Spaces:

meesamraza
/

Programming_Developer_Advisor_Chatbot

Sleeping

File size: 16,978 Bytes

# src/streamlit_app.py

import streamlit as st
import pandas as pd
import io
import os
import fitz  # PyMuPDF
import docx2txt
from groq import Groq
from dotenv import load_dotenv
from pydantic import BaseModel, Field

# --- 1. CONFIGURATION AND INITIALIZATION ---

# 🚨 FIX for .env: Load environment variables by explicitly pointing up one directory.
# This ensures the script finds the .env file even though it's run from the 'src' folder.
load_dotenv(os.path.join(os.path.dirname(__file__), '..', '.env')) 

GROQ_API_KEY = os.getenv("GROQ_API_KEY")

# Initialize Groq Client
if GROQ_API_KEY:
    try:
        groq_client = Groq(api_key=GROQ_API_KEY)
    except Exception as e:
        st.error(f"Error initializing Groq Client: {e}")
        st.stop()
else:
    # This message should no longer appear if the .env fix works
    st.error("GROQ_API_KEY not found. Please ensure the .env file is in the project root and contains your key.")
    st.stop()

# Admin Password (as requested)
ADMIN_PASSWORD = "admin"

# Initialize Session State
if 'is_admin_logged_in' not in st.session_state:
    st.session_state.is_admin_logged_in = False
if 'analyzed_data' not in st.session_state:
    # Define DataFrame with columns for initial structure
    initial_cols = [
        'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
        'Experience Summary', 'Education Summary', 'Communication Rating (1-10)', 
        'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)', 
        'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
    ]
    st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)


# --- 2. DATA STRUCTURE FOR GROQ OUTPUT (Pydantic Schema) ---

class ResumeAnalysis(BaseModel):
    """Pydantic model for structured resume data extraction."""
    name: str = Field(description="Full name of the candidate.")
    email: str = Field(description="Professional email address.")
    phone: str = Field(description="Primary phone number.")
    certifications: list[str] = Field(description="List of professional certifications (e.g., PMP, AWS Certified).")
    experience_summary: str = Field(description="A concise summary of the candidate's professional experience.")
    education_summary: str = Field(description="A concise summary of the candidate's highest education.")
    communication_skills: str = Field(description="A rating (1-10) or brief description of communication skills based on the resume language.")
    technical_skills: list[str] = Field(description="List of technical skills/technologies mentioned (e.g., Python, SQL, Docker).")
    aba_therapy_skills: str = Field(description="Specific mention or score (1-10) for ABA Therapy skills, ONLY if the role is 'Therapist'.")
    rbt_bcba_certification: str = Field(description="Indicate 'Yes' or 'No' if RBT/BCBA certification is mentioned, ONLY if the role is 'Therapist'.")
    autism_care_experience_score: str = Field(description="A score (1-10) for Autism-Care Experience, ONLY if the role is 'Therapist'.")

# --- 3. HELPER FUNCTIONS ---

def extract_text_from_file(uploaded_file):
    """Extracts text from PDF or DOCX files."""
    file_type = uploaded_file.type
    try:
        if file_type == "application/pdf":
            # Use PyMuPDF for PDF
            with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
                text = ""
                for page in doc:
                    text += page.get_text()
            return text
        elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
            # Use docx2txt for DOCX
            return docx2txt.process(uploaded_file)
        else:
            return ""
    except Exception as e:
        st.error(f"Error extracting text: {e}")
        return ""

@st.cache_data(show_spinner="Analyzing resume with Groq...")
def analyze_resume_with_groq(resume_text: str, job_role: str) -> ResumeAnalysis:
    """Uses Groq and the Pydantic schema for structured extraction."""
    
    # Custom instructions for Therapist role
    therapist_instructions = ""
    if job_role == "Therapist":
        therapist_instructions = (
            "Because the job role is 'Therapist', you MUST carefully look for: "
            "1. ABA Therapy Skills, RBT/BCBA Certification, and Autism-Care Experience. "
            "2. Provide a score from 1-10 for the specialized fields: 'aba_therapy_skills' and 'autism_care_experience_score'. "
            "3. Set 'rbt_bcba_certification' to 'Yes' or 'No'."
        )

    # System Prompt for Groq
    system_prompt = (
        f"You are a professional Resume Analyzer. Your task is to extract specific information from the provided resume text. "
        f"The candidate is applying for the role of '{job_role}'. "
        f"Follow the instructions precisely and return a JSON object that strictly adheres to the provided Pydantic schema. "
        f"For skills, provide a list of 5-10 most relevant items. {therapist_instructions}"
    )

    try:
        chat_completion = groq_client.chat.completions.create(
            model="mixtral-8x7b-32768", # Fast model suitable for this task
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"Analyze the following resume text:\n\n---\n{resume_text}\n---"}
            ],
            response_model={"type": "json_object", "schema": ResumeAnalysis.schema()},
            temperature=0.0
        )
        
        # The response is a JSON string, which we can parse into the Pydantic model
        analysis = ResumeAnalysis.parse_raw(chat_completion.choices[0].message.content)
        return analysis
        
    except Exception as e:
        st.error(f"Groq API Error: {e}")
        # Return an empty/default analysis object on failure
        return ResumeAnalysis(name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="0", technical_skills=[], aba_therapy_skills="0", rbt_bcba_certification="No", autism_care_experience_score="0")


def calculate_resume_score(analysis: ResumeAnalysis) -> float:
    """Calculates the weighted score out of 100."""
    
    # Weights for maximum possible score contribution:
    # Experience (40%), Skills (30%), Communication (20%), Certifications (10%)
    
    total_score = 0.0

    # 1. Experience Score (Max 40 points)
    # Simple heuristic: longer summary means more experience found.
    # Max score is 40.
    exp_factor = min(len(analysis.experience_summary) / 100.0, 1.0) # Use 100 chars as the max point
    total_score += exp_factor * 40.0

    # 2. Skills Score (Max 30 points)
    # Based on number of skills found (up to 10 relevant skills)
    # Max score is 30.
    skills_factor = min(len(analysis.technical_skills) / 10.0, 1.0)
    total_score += skills_factor * 30.0

    # 3. Communication Score (Max 20 points)
    # Assuming 'communication_skills' is a score string '1-10' from Groq
    try:
        # Tries to extract the first number from the string (e.g., '7-High' -> 7)
        comm_rating = float(analysis.communication_skills.split('-')[0].strip())
    except (ValueError, IndexError):
        comm_rating = 5.0 # Default if Groq returns unparsable text
        
    score_comm = (comm_rating / 10.0) * 20.0 # Scale 1-10 rating to max 20 points
    total_score += score_comm

    # 4. Certification Score (Max 10 points)
    # Each certification adds a point, max 10 certs.
    score_cert = min(len(analysis.certifications), 10) * 1.0 
    total_score += score_cert

    # --- Therapist-Specific Bonus Checks ---
    if st.session_state.get('selected_role') == "Therapist":
        # Additional points based on specialized scores (e.g., up to 5 points bonus)
        try:
            aba_score = float(analysis.aba_therapy_skills.split('-')[0].strip())
            autism_score = float(analysis.autism_care_experience_score.split('-')[0].strip())
            
            # Add a bonus based on the average specialized scores (max 10 points)
            specialized_bonus = ((aba_score + autism_score) / 20.0) * 10.0
            total_score += specialized_bonus
        except (ValueError, IndexError):
            pass # Ignore if specialized scores are not numbers

    
    # Final cleanup and capping
    final_score = round(min(total_score, 100))
    return float(final_score)


def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
    """Formats and appends the new analysis to the session state DataFrame."""
    
    # Convert Pydantic model to dictionary
    data = analysis.dict()
    
    # Add computed and derived fields
    data['Job Role'] = job_role
    data['Resume Score'] = score
    data['Shortlisted'] = 'No' # Default status
    
    # Clean up list fields for display/Excel
    technical_skills_list = ", ".join(data['technical_skills'])
    certifications_list = ", ".join(data['certifications'])
    
    # The new row data
    df_data = {
        'Name': data['name'],
        'Job Role': job_role,
        'Resume Score (100)': score,
        'Email': data['email'],
        'Phone': data['phone'],
        'Shortlisted': data['Shortlisted'],
        'Experience Summary': data['experience_summary'],
        'Education Summary': data['education_summary'],
        'Communication Rating (1-10)': data['communication_skills'],
        'Skills/Technologies': technical_skills_list,
        'Certifications': certifications_list,
        'ABA Skills (1-10)': data['aba_therapy_skills'],
        'RBT/BCBA Cert': data['rbt_bcba_certification'],
        'Autism-Care Exp (1-10)': data['autism_care_experience_score'],
    }

    # Convert to a single-row DataFrame and concatenate
    new_df = pd.DataFrame([df_data])
    st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)


# --- 4. APP LAYOUT AND LOGIC ---

st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered")

st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis")

# --- Tabs for User and Admin ---
tab_user, tab_admin = st.tabs(["👤 Resume Uploader (User Panel)", "🔒 Admin Dashboard (Password Protected)"])

# =========================================================================
# A. Resume Upload (User Panel)
# =========================================================================
with tab_user:
    st.header("Upload Resumes for Analysis")
    st.info("Upload multiple PDF or DOCX files. The Groq AI engine will quickly extract and score the key data.")
    
    # Job Role Selection
    job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
    selected_role = st.selectbox(
        "**1. Select the Target Job Role** (Influences analysis and scoring)",
        options=job_role_options,
        key='selected_role' # Store role in session state for scoring logic
    )

    # File Uploader
    uploaded_files = st.file_uploader(
        "**2. Upload Resumes** (PDF or DOCX)",
        type=["pdf", "docx"],
        accept_multiple_files=True
    )
    
    if st.button("🚀 Analyze All Uploaded Resumes"):
        if not uploaded_files:
            st.warning("Please upload one or more resume files to begin analysis.")
        else:
            total_files = len(uploaded_files)
            progress_bar = st.progress(0)
            
            # Clear previous individual file analysis displays
            st.session_state.individual_analysis = []
            
            with st.status("Processing Resumes...", expanded=True) as status_box:
                
                for i, file in enumerate(uploaded_files):
                    file_name = file.name
                    st.write(f"Analyzing **{file_name}**...")
                    
                    # 1. Extract Text
                    resume_text = extract_text_from_file(file)
                    
                    if not resume_text:
                        st.error(f"Could not extract text from {file_name}. Skipping.")
                        continue
                    
                    # 2. Analyze with Groq
                    analysis = analyze_resume_with_groq(resume_text, selected_role)
                    
                    if analysis.name == "Extraction Failed":
                         st.error(f"Groq extraction failed for {file_name}. Skipping.")
                         continue
                        
                    # 3. Calculate Score
                    score = calculate_resume_score(analysis)
                    
                    # 4. Store Data
                    append_analysis_to_dataframe(selected_role, analysis, score)
                    
                    # Store data for individual display below
                    st.session_state.individual_analysis.append({
                        'name': analysis.name,
                        'score': score,
                        'role': selected_role,
                        'file_name': file_name
                    })

                    # Update progress
                    progress_bar.progress((i + 1) / total_files)
                    
                status_box.update(label="Analysis Complete!", state="complete", expanded=False)
            
            st.success(f"**✅ Successfully analyzed {total_files} resumes.**")
            
    # Display results of the last batch of analysis
    if 'individual_analysis' in st.session_state and st.session_state.individual_analysis:
        st.subheader("Last Analysis Summary")
        for item in st.session_state.individual_analysis:
            st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**")
            
        st.markdown("---")
        st.caption("All analyzed data is stored in the **Admin Dashboard**.")

# =========================================================================
# B. Admin Panel (Password Protected)
# =========================================================================
with tab_admin:
    
    # --- Login Logic ---
    if not st.session_state.is_admin_logged_in:
        st.header("Admin Login")
        password = st.text_input("Enter Admin Password", type="password")
        if st.button("🔑 Login"):
            if password == ADMIN_PASSWORD:
                st.session_state.is_admin_logged_in = True
                st.rerun()
            else:
                st.error("Incorrect password.")
        st.stop() # Stop execution until logged in
    
    # --- Dashboard Content (Logged In) ---
    st.header("🎯 Recruitment Dashboard")
    st.markdown("---")
    
    if st.button("🚪 Logout"):
        st.session_state.is_admin_logged_in = False
        st.rerun()

    if st.session_state.analyzed_data.empty:
        st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
    else:
        df = st.session_state.analyzed_data.copy()
        
        # --- 1. Shortlisting & Data Display ---
        st.subheader("Candidate Data Table")
        st.success(f"**Total Candidates Analyzed: {len(df)}**")

        # Key columns for display
        display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']
        
        # Editable Data Table (allowing admin to change 'Shortlisted' status)
        edited_df = st.data_editor(
            df[display_cols],
            column_config={
                "Shortlisted": st.column_config.SelectboxColumn(
                    "Shortlisted",
                    help="Mark the candidate as Shortlisted or Rejected.",
                    options=["No", "Yes"],
                    required=True,
                )
            },
            key="dashboard_editor",
            hide_index=True
        )
        
        # Update the session state DataFrame with the edited shortlisting status
        # This keeps the changes persistent
        st.session_state.analyzed_data['Shortlisted'] = edited_df['Shortlisted']

        st.markdown("---")

        # --- 2. Download Excel File ---
        st.subheader("📥 Download Data")

        # The full DataFrame to export
        df_export = st.session_state.analyzed_data.copy()

        # Create an in-memory Excel file buffer
        excel_buffer = io.BytesIO()
        with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer:
            df_export.to_excel(writer, index=False, sheet_name='Resume Analysis Data')
        excel_buffer.seek(0)

        st.download_button(
            label="💾 Download All Data as Excel (.xlsx)",
            data=excel_buffer,
            file_name="quantum_scrutiny_report.xlsx",
            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
            help="Downloads the full table including all extracted fields and shortlist status."
        )

# --- End of src/streamlit_app.py ---