Spaces:

meesamraza
/

Programming_Developer_Advisor_Chatbot

Sleeping

File size: 17,191 Bytes

d915eee
 
58b9e2b
 
 
d68ff83
d915eee
58b9e2b
ad5e7c5
 
5fe15a6
 
58b9e2b
d915eee
 
 
58b9e2b
 
d915eee
 
 
58b9e2b
 
 
 
 
 
 
 
d915eee
58b9e2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad5e7c5
58b9e2b
 
 
 
 
5fe15a6
58b9e2b
 
d915eee
5fe15a6
 
 
 
 
 
 
 
 
 
 
 
d915eee
58b9e2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d915eee
58b9e2b
 
 
 
 
 
 
 
 
 
5fe15a6
 
 
d915eee
 
5fe15a6
 
d915eee
5fe15a6
58b9e2b
 
 
 
 
 
5fe15a6
 
 
08231ff
ad5e7c5
58b9e2b
 
5fe15a6
58b9e2b
 
 
 
 
 
 
 
5fe15a6
58b9e2b
5fe15a6
 
 
 
 
 
 
58b9e2b
 
5fe15a6
 
 
 
58b9e2b
d915eee
 
58b9e2b
 
 
 
 
 
 
 
d915eee
58b9e2b
 
 
 
 
 
 
 
5fe15a6
 
d915eee
58b9e2b
5fe15a6
58b9e2b
d915eee
58b9e2b
 
 
 
 
 
 
 
 
5fe15a6
 
 
58b9e2b
 
 
 
5fe15a6
 
58b9e2b
 
 
 
 
 
 
 
 
 
 
 
d915eee
58b9e2b
 
 
 
5fe15a6
 
 
 
 
 
58b9e2b
 
 
 
 
 
 
 
 
5fe15a6
58b9e2b
 
5fe15a6
 
 
58b9e2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d915eee
58b9e2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d915eee
58b9e2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad5e7c5
58b9e2b
 
 
 
 
2b674bb
58b9e2b
 
 
 
 
 
 
ad5e7c5
58b9e2b

# src/streamlit_app.py

import streamlit as st
import pandas as pd
import io
import os
import fitz
import docx2txt
from groq import Groq
from dotenv import load_dotenv
from pydantic import BaseModel, Field, ValidationError # Added ValidationError
from typing import Optional, List # Added Optional and List

# --- 0. FIX: SET PAGE CONFIG AS THE FIRST STREAMLIT COMMAND ---
st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered")
# FIX for Hugging Face Deployment: Read the key from the environment/Secrets.
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

# Admin Password (as requested)
ADMIN_PASSWORD = "admin"

# Initialize Groq Client
if GROQ_API_KEY:
    try:
        groq_client = Groq(api_key=GROQ_API_KEY)
    except Exception as e:
        st.error(f"Error initializing Groq Client: {e}")
        st.stop()
else:
    st.error("GROQ_API_KEY not found. Please ensure the key is set as a Secret in Hugging Face or in the local .env file.")
    st.stop()

# Initialize Session State
if 'is_admin_logged_in' not in st.session_state:
    st.session_state.is_admin_logged_in = False
if 'analyzed_data' not in st.session_state:
    initial_cols = [
        'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
        'Experience Summary', 'Education Summary', 'Communication Rating (1-10)', 
        'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)', 
        'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
    ]
    st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)


# --- 2. DATA STRUCTURE FOR GROQ OUTPUT (Pydantic Schema) ---

class ResumeAnalysis(BaseModel):
    """Pydantic model for structured resume data extraction."""
    name: str = Field(description="Full name of the candidate.")
    email: str = Field(description="Professional email address.")
    phone: str = Field(description="Primary phone number.")
    certifications: List[str] = Field(description="List of professional certifications.")
    experience_summary: str = Field(description="A concise summary of the candidate's professional experience.")
    education_summary: str = Field(description="A concise summary of the candidate's highest education.")
    
    # --- CRITICAL FIX: Use str or Optional[str] and improve coercion ---
    # The Groq model is returning INT (8) instead of STR ('8') for communication_skills. 
    # The most stable fix is to keep the field as str and rely on Groq's JSON mode 
    # but improve the prompt guidance. We will also update the helper functions to be more robust.
    communication_skills: str = Field(description="A score as a STRING (e.g., '8') or description of communication skills.")
    technical_skills: List[str] = Field(description="List of technical skills/technologies mentioned.")
    
    # These fields can sometimes return None, so we make them Optional[str]
    # and default them to "N/A" in the final output in the analyze function if still None.
    aba_therapy_skills: Optional[str] = Field(default="N/A", description="Specific score as a STRING (e.g., '7'). Use 'N/A' if not applicable.")
    rbt_bcba_certification: Optional[str] = Field(default="N/A", description="Indicate 'Yes' or 'No'. Use 'N/A' if not applicable.")
    autism_care_experience_score: Optional[str] = Field(default="N/A", description="A score as a STRING (e.g., '9'). Use 'N/A' if not applicable.")


# --- 3. HELPER FUNCTIONS ---

def extract_text_from_file(uploaded_file):
    """Extracts text from PDF or DOCX files."""
    file_type = uploaded_file.type
    try:
        if file_type == "application/pdf":
            with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
                text = ""
                for page in doc:
                    text += page.get_text()
            return text
        elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
            return docx2txt.process(uploaded_file)
        else:
            return ""
    except Exception as e:
        print(f"Error extracting text: {e}")
        return ""

@st.cache_data(show_spinner="Analyzing resume with Groq...")
def analyze_resume_with_groq(resume_text: str, job_role: str) -> ResumeAnalysis:
    """Uses Groq and the Pydantic schema for structured extraction."""
    
    # Custom instructions for Therapist role
    therapist_instructions = ""
    if job_role == "Therapist":
        therapist_instructions = (
            "Because the job role is 'Therapist', you MUST carefully look for ABA Therapy Skills, RBT/BCBA Certification, and Autism-Care Experience. "
            "Provide a score from 1-10 as a **STRING** (e.g., '7') for the specialized fields. "
            "If any specialized therapist field is not found, you MUST return **null** or **N/A** for that field."
        )
    else:
        # For non-therapist roles, explicitly instruct the model to use 'null' 
        # so Optional[str] handles it cleanly.
        therapist_instructions = (
            "Since the role is not 'Therapist', set 'aba_therapy_skills', 'autism_care_experience_score', and 'rbt_bcba_certification' to **null** or **N/A**."
        )

    # System Prompt for Groq
    system_prompt = (
        f"You are a professional Resume Analyzer. Your task is to extract specific information from the provided resume text. "
        f"The candidate is applying for the role of '{job_role}'. "
        f"Return a JSON object that strictly adheres to the provided Pydantic schema. "
        f"**CRITICAL:** Ensure 'communication_skills' is returned as a **STRING** value, even if it's a number (e.g., \"8\" NOT 8). " # <-- Re-emphasizing string output for the specific failing field
        f"{therapist_instructions}"
    )

    try:
        chat_completion = groq_client.chat.completions.create(
            model="mixtral-8x7b-32768", 
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"Analyze the following resume text:\n\n---\n{resume_text}\n---"}
            ],
            response_model={"type": "json_object", "schema": ResumeAnalysis.schema()},
            temperature=0.0
        )
        
        # Parse the JSON response
        analysis = ResumeAnalysis.parse_raw(chat_completion.choices[0].message.content)

        # Post-processing: Ensure Optional fields are strings for score calculation
        analysis.aba_therapy_skills = str(analysis.aba_therapy_skills or 'N/A')
        analysis.rbt_bcba_certification = str(analysis.rbt_bcba_certification or 'N/A')
        analysis.autism_care_experience_score = str(analysis.autism_care_experience_score or 'N/A')
        analysis.communication_skills = str(analysis.communication_skills) # Coerce communication_skills to string just in case it passed validation as an int somehow

        return analysis
        
    except ValidationError as ve:
        st.error(f"Groq API Validation Error: The model returned incompatible data. Details: {ve}")
        print(f"Failed JSON: {chat_completion.choices[0].message.content}") # Print the bad JSON for debugging
        return ResumeAnalysis(name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A")
    except Exception as e:
        st.error(f"Groq API Error: {e}") 
        return ResumeAnalysis(name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A")


def calculate_resume_score(analysis: ResumeAnalysis) -> float:
    """Calculates the weighted score out of 100."""
    
    total_score = 0.0

    # 1. Experience Score (Max 40 points)
    exp_factor = min(len(analysis.experience_summary) / 100.0, 1.0)
    total_score += exp_factor * 40.0

    # 2. Skills Score (Max 30 points)
    skills_factor = min(len(analysis.technical_skills) / 10.0, 1.0)
    total_score += skills_factor * 30.0

    # 3. Communication Score (Max 20 points)
    try:
        # Safely parse the communication score string, handling N/A or raw numbers
        score_str = str(analysis.communication_skills).split('-')[0].strip() # Use str() to handle if it somehow remained an int
        comm_rating = float(score_str)
    except (ValueError, IndexError):
        comm_rating = 5.0 
        
    score_comm = (comm_rating / 10.0) * 20.0
    total_score += score_comm

    # 4. Certification Score (Max 10 points)
    score_cert = min(len(analysis.certifications), 10) * 1.0 
    total_score += score_cert

    # --- Therapist-Specific Bonus Checks ---
    if st.session_state.get('selected_role') == "Therapist":
        try:
            # Safely parse specialized scores, handling 'N/A' or None
            aba_score = float(str(analysis.aba_therapy_skills).split('-')[0].strip()) if str(analysis.aba_therapy_skills).upper() not in ['N/A', 'NONE'] else 0.0
            autism_score = float(str(analysis.autism_care_experience_score).split('-')[0].strip()) if str(analysis.autism_care_experience_score).upper() not in ['N/A', 'NONE'] else 0.0
            
            # Add a bonus based on the average specialized scores (max 10 points)
            specialized_bonus = ((aba_score + autism_score) / 20.0) * 10.0
            total_score += specialized_bonus
        except (ValueError, IndexError, TypeError):
            pass # Ignore if specialized scores are still corrupted

    
    final_score = round(min(total_score, 100))
    return float(final_score)


def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
    """Formats and appends the new analysis to the session state DataFrame."""
    
    data = analysis.dict()
    data['Job Role'] = job_role
    data['Resume Score'] = score
    data['Shortlisted'] = 'No'
    
    technical_skills_list = ", ".join(data['technical_skills'])
    certifications_list = ", ".join(data['certifications'])
    
    # Ensure fields that might have been None are now strings for the DataFrame
    comm_skills = str(data['communication_skills'] or 'N/A')
    aba_skills = str(data['aba_therapy_skills'] or 'N/A')
    rbt_cert = str(data['rbt_bcba_certification'] or 'N/A')
    autism_exp = str(data['autism_care_experience_score'] or 'N/A')
    
    df_data = {
        'Name': data['name'],
        'Job Role': job_role,
        'Resume Score (100)': score,
        'Email': data['email'],
        'Phone': data['phone'],
        'Shortlisted': data['Shortlisted'],
        'Experience Summary': data['experience_summary'],
        'Education Summary': data['education_summary'],
        'Communication Rating (1-10)': comm_skills,
        'Skills/Technologies': technical_skills_list,
        'Certifications': certifications_list,
        'ABA Skills (1-10)': aba_skills,
        'RBT/BCBA Cert': rbt_cert,
        'Autism-Care Exp (1-10)': autism_exp,
    }

    new_df = pd.DataFrame([df_data])
    st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)


# --- 4. APP LAYOUT AND LOGIC ---

st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis")

tab_user, tab_admin = st.tabs(["👤 Resume Uploader (User Panel)", "🔒 Admin Dashboard (Password Protected)"])

# =========================================================================
# A. Resume Upload (User Panel)
# =========================================================================
with tab_user:
    st.header("Upload Resumes for Analysis")
    st.info("Upload multiple PDF or DOCX files. The Groq AI engine will quickly extract and score the key data.")
    
    job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
    selected_role = st.selectbox(
        "**1. Select the Target Job Role** (Influences analysis and scoring)",
        options=job_role_options,
        key='selected_role'
    )

    uploaded_files = st.file_uploader(
        "**2. Upload Resumes** (PDF or DOCX)",
        type=["pdf", "docx"],
        accept_multiple_files=True
    )
    
    if st.button("🚀 Analyze All Uploaded Resumes"):
        if not uploaded_files:
            st.warning("Please upload one or more resume files to begin analysis.")
        else:
            total_files = len(uploaded_files)
            progress_bar = st.progress(0)
            
            st.session_state.individual_analysis = []
            
            with st.status("Processing Resumes...", expanded=True) as status_box:
                
                for i, file in enumerate(uploaded_files):
                    file_name = file.name
                    st.write(f"Analyzing **{file_name}**...")
                    
                    resume_text = extract_text_from_file(file)
                    
                    if not resume_text:
                        st.error(f"Could not extract text from {file_name}. Skipping.")
                        continue
                    
                    analysis = analyze_resume_with_groq(resume_text, selected_role)
                    
                    if analysis.name == "Extraction Failed":
                         st.error(f"Groq extraction failed for {file_name}. Skipping.")
                         continue
                        
                    score = calculate_resume_score(analysis)
                    append_analysis_to_dataframe(selected_role, analysis, score)
                    
                    st.session_state.individual_analysis.append({
                        'name': analysis.name,
                        'score': score,
                        'role': selected_role,
                        'file_name': file_name
                    })

                    progress_bar.progress((i + 1) / total_files)
                    
                status_box.update(label="Analysis Complete!", state="complete", expanded=False)
            
            st.success(f"**✅ Successfully analyzed {total_files} resumes.**")
            
    if 'individual_analysis' in st.session_state and st.session_state.individual_analysis:
        st.subheader("Last Analysis Summary")
        for item in st.session_state.individual_analysis:
            st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**")
            
        st.markdown("---")
        st.caption("All analyzed data is stored in the **Admin Dashboard**.")

# =========================================================================
# B. Admin Panel (Password Protected)
# =========================================================================
with tab_admin:
    
    if not st.session_state.is_admin_logged_in:
        st.header("Admin Login")
        password = st.text_input("Enter Admin Password", type="password")
        if st.button("🔑 Login"):
            if password == ADMIN_PASSWORD:
                st.session_state.is_admin_logged_in = True
                st.rerun()
            else:
                st.error("Incorrect password.")
        st.stop()
    
    st.header("🎯 Recruitment Dashboard")
    st.markdown("---")
    
    if st.button("🚪 Logout"):
        st.session_state.is_admin_logged_in = False
        st.rerun()

    if st.session_state.analyzed_data.empty:
        st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
    else:
        df = st.session_state.analyzed_data.copy()
        
        st.subheader("Candidate Data Table")
        st.success(f"**Total Candidates Analyzed: {len(df)}**")

        display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']
        
        edited_df = st.data_editor(
            df[display_cols],
            column_config={
                "Shortlisted": st.column_config.SelectboxColumn(
                    "Shortlisted",
                    help="Mark the candidate as Shortlisted or Rejected.",
                    options=["No", "Yes"],
                    required=True,
                )
            },
            key="dashboard_editor",
            hide_index=True
        )
        
        st.session_state.analyzed_data['Shortlisted'] = edited_df['Shortlisted']

        st.markdown("---")

        st.subheader("📥 Download Data")

        df_export = st.session_state.analyzed_data.copy()
        excel_buffer = io.BytesIO()
        with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer:
            df_export.to_excel(writer, index=False, sheet_name='Resume Analysis Data')
        excel_buffer.seek(0)

        st.download_button(
            label="💾 Download All Data as Excel (.xlsx)",
            data=excel_buffer,
            file_name="quantum_scrutiny_report.xlsx",
            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
            help="Downloads the full table including all extracted fields and shortlist status."
        )

# --- End of src/streamlit_app.py ---