Spaces:

meesamraza
/

Programming_Developer_Advisor_Chatbot

Sleeping

App Files Files Community

meesamraza commited on Dec 6, 2025

Commit

58b9e2b

verified ·

1 Parent(s): d68ff83

Update app.py

Browse files

Files changed (1) hide show

app.py +383 -46

app.py CHANGED Viewed

@@ -1,56 +1,393 @@
 import os
 from groq import Groq
-import streamlit as st
 from dotenv import load_dotenv
-# Load API key from .env file
-load_dotenv()
-api_key = os.getenv("GROQ_API_KEY")
-# Initialize the Groq client
-client = Groq(api_key=api_key)
-# Define the programming development topics for the chatbot
-developer_topics = [
-    "best programming languages", "web development frameworks", "version control with Git",
-    "debugging tips", "data structures and algorithms", "object-oriented programming",
-    "functional programming", "software design patterns", "API design and development",
-    "devops practices", "cloud computing", "front-end development", "back-end development",
-    "machine learning", "deep learning", "software testing and QA", "agile methodologies",
-    "CI/CD pipelines", "database design", "programming best practices", "security in development",
-    "mobile app development", "project management for developers", "open source contribution",
-    "developer tools and IDEs", "documentation and code commenting", "coding interview preparation"
-]
-# Function to fetch chatbot completion from Groq API
-def get_response(query):
-    completion = client.chat.completions.create(
-        model="llama-3.3-70b-versatile",
-        messages=[{"role": "user", "content": query}],
-        temperature=0.7,
-        max_completion_tokens=2024,
-        top_p=1,
     )
-    response = completion.choices[0].message.content
-    return response
-def main():
-    st.title("Programming Developer Advisor Chatbot")
-    # Let the user choose a developer-related topic or type a custom query
-    topic = st.selectbox("Choose a programming topic", developer_topics)
-    user_input = st.text_area("Or ask a programming-related question:", "")
-    # If the user provides a query (not from audio), use that directly
-    if user_input:
-        query = user_input
-        response = get_response(query)
-        st.write("### Response:")
-        st.write(response)
-    # Handle unrelated queries
-    if user_input and not any(topic in user_input.lower() for topic in developer_topics):
-        st.write("Sorry, I can only answer programming-related questions.")
-if __name__ == "__main__":
-    main()

+# src/streamlit_app.py
+import streamlit as st
+import pandas as pd
+import io
 import os
+import fitz  # PyMuPDF
+import docx2txt
 from groq import Groq
 from dotenv import load_dotenv
+from pydantic import BaseModel, Field
+# --- 1. CONFIGURATION AND INITIALIZATION ---
+# 🚨 FIX for .env: Load environment variables by explicitly pointing up one directory.
+# This ensures the script finds the .env file even though it's run from the 'src' folder.
+load_dotenv(os.path.join(os.path.dirname(__file__), '..', '.env'))
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+# Initialize Groq Client
+if GROQ_API_KEY:
+    try:
+        groq_client = Groq(api_key=GROQ_API_KEY)
+    except Exception as e:
+        st.error(f"Error initializing Groq Client: {e}")
+        st.stop()
+else:
+    # This message should no longer appear if the .env fix works
+    st.error("GROQ_API_KEY not found. Please ensure the .env file is in the project root and contains your key.")
+    st.stop()
+# Admin Password (as requested)
+ADMIN_PASSWORD = "admin"
+# Initialize Session State
+if 'is_admin_logged_in' not in st.session_state:
+    st.session_state.is_admin_logged_in = False
+if 'analyzed_data' not in st.session_state:
+    # Define DataFrame with columns for initial structure
+    initial_cols = [
+        'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
+        'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
+        'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)',
+        'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
+    ]
+    st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)
+# --- 2. DATA STRUCTURE FOR GROQ OUTPUT (Pydantic Schema) ---
+class ResumeAnalysis(BaseModel):
+    """Pydantic model for structured resume data extraction."""
+    name: str = Field(description="Full name of the candidate.")
+    email: str = Field(description="Professional email address.")
+    phone: str = Field(description="Primary phone number.")
+    certifications: list[str] = Field(description="List of professional certifications (e.g., PMP, AWS Certified).")
+    experience_summary: str = Field(description="A concise summary of the candidate's professional experience.")
+    education_summary: str = Field(description="A concise summary of the candidate's highest education.")
+    communication_skills: str = Field(description="A rating (1-10) or brief description of communication skills based on the resume language.")
+    technical_skills: list[str] = Field(description="List of technical skills/technologies mentioned (e.g., Python, SQL, Docker).")
+    aba_therapy_skills: str = Field(description="Specific mention or score (1-10) for ABA Therapy skills, ONLY if the role is 'Therapist'.")
+    rbt_bcba_certification: str = Field(description="Indicate 'Yes' or 'No' if RBT/BCBA certification is mentioned, ONLY if the role is 'Therapist'.")
+    autism_care_experience_score: str = Field(description="A score (1-10) for Autism-Care Experience, ONLY if the role is 'Therapist'.")
+# --- 3. HELPER FUNCTIONS ---
+def extract_text_from_file(uploaded_file):
+    """Extracts text from PDF or DOCX files."""
+    file_type = uploaded_file.type
+    try:
+        if file_type == "application/pdf":
+            # Use PyMuPDF for PDF
+            with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
+                text = ""
+                for page in doc:
+                    text += page.get_text()
+            return text
+        elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
+            # Use docx2txt for DOCX
+            return docx2txt.process(uploaded_file)
+        else:
+            return ""
+    except Exception as e:
+        st.error(f"Error extracting text: {e}")
+        return ""
+@st.cache_data(show_spinner="Analyzing resume with Groq...")
+def analyze_resume_with_groq(resume_text: str, job_role: str) -> ResumeAnalysis:
+    """Uses Groq and the Pydantic schema for structured extraction."""
+    # Custom instructions for Therapist role
+    therapist_instructions = ""
+    if job_role == "Therapist":
+        therapist_instructions = (
+            "Because the job role is 'Therapist', you MUST carefully look for: "
+            "1. ABA Therapy Skills, RBT/BCBA Certification, and Autism-Care Experience. "
+            "2. Provide a score from 1-10 for the specialized fields: 'aba_therapy_skills' and 'autism_care_experience_score'. "
+            "3. Set 'rbt_bcba_certification' to 'Yes' or 'No'."
+        )
+    # System Prompt for Groq
+    system_prompt = (
+        f"You are a professional Resume Analyzer. Your task is to extract specific information from the provided resume text. "
+        f"The candidate is applying for the role of '{job_role}'. "
+        f"Follow the instructions precisely and return a JSON object that strictly adheres to the provided Pydantic schema. "
+        f"For skills, provide a list of 5-10 most relevant items. {therapist_instructions}"
     )
+    try:
+        chat_completion = groq_client.chat.completions.create(
+            model="mixtral-8x7b-32768", # Fast model suitable for this task
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": f"Analyze the following resume text:\n\n---\n{resume_text}\n---"}
+            ],
+            response_model={"type": "json_object", "schema": ResumeAnalysis.schema()},
+            temperature=0.0
+        )
+        # The response is a JSON string, which we can parse into the Pydantic model
+        analysis = ResumeAnalysis.parse_raw(chat_completion.choices[0].message.content)
+        return analysis
+    except Exception as e:
+        st.error(f"Groq API Error: {e}")
+        # Return an empty/default analysis object on failure
+        return ResumeAnalysis(name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="0", technical_skills=[], aba_therapy_skills="0", rbt_bcba_certification="No", autism_care_experience_score="0")
+def calculate_resume_score(analysis: ResumeAnalysis) -> float:
+    """Calculates the weighted score out of 100."""
+    # Weights for maximum possible score contribution:
+    # Experience (40%), Skills (30%), Communication (20%), Certifications (10%)
+    total_score = 0.0
+    # 1. Experience Score (Max 40 points)
+    # Simple heuristic: longer summary means more experience found.
+    # Max score is 40.
+    exp_factor = min(len(analysis.experience_summary) / 100.0, 1.0) # Use 100 chars as the max point
+    total_score += exp_factor * 40.0
+    # 2. Skills Score (Max 30 points)
+    # Based on number of skills found (up to 10 relevant skills)
+    # Max score is 30.
+    skills_factor = min(len(analysis.technical_skills) / 10.0, 1.0)
+    total_score += skills_factor * 30.0
+    # 3. Communication Score (Max 20 points)
+    # Assuming 'communication_skills' is a score string '1-10' from Groq
+    try:
+        # Tries to extract the first number from the string (e.g., '7-High' -> 7)
+        comm_rating = float(analysis.communication_skills.split('-')[0].strip())
+    except (ValueError, IndexError):
+        comm_rating = 5.0 # Default if Groq returns unparsable text
+    score_comm = (comm_rating / 10.0) * 20.0 # Scale 1-10 rating to max 20 points
+    total_score += score_comm
+    # 4. Certification Score (Max 10 points)
+    # Each certification adds a point, max 10 certs.
+    score_cert = min(len(analysis.certifications), 10) * 1.0
+    total_score += score_cert
+    # --- Therapist-Specific Bonus Checks ---
+    if st.session_state.get('selected_role') == "Therapist":
+        # Additional points based on specialized scores (e.g., up to 5 points bonus)
+        try:
+            aba_score = float(analysis.aba_therapy_skills.split('-')[0].strip())
+            autism_score = float(analysis.autism_care_experience_score.split('-')[0].strip())
+            # Add a bonus based on the average specialized scores (max 10 points)
+            specialized_bonus = ((aba_score + autism_score) / 20.0) * 10.0
+            total_score += specialized_bonus
+        except (ValueError, IndexError):
+            pass # Ignore if specialized scores are not numbers
+    # Final cleanup and capping
+    final_score = round(min(total_score, 100))
+    return float(final_score)
+def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
+    """Formats and appends the new analysis to the session state DataFrame."""
+    # Convert Pydantic model to dictionary
+    data = analysis.dict()
+    # Add computed and derived fields
+    data['Job Role'] = job_role
+    data['Resume Score'] = score
+    data['Shortlisted'] = 'No' # Default status
+    # Clean up list fields for display/Excel
+    technical_skills_list = ", ".join(data['technical_skills'])
+    certifications_list = ", ".join(data['certifications'])
+    # The new row data
+    df_data = {
+        'Name': data['name'],
+        'Job Role': job_role,
+        'Resume Score (100)': score,
+        'Email': data['email'],
+        'Phone': data['phone'],
+        'Shortlisted': data['Shortlisted'],
+        'Experience Summary': data['experience_summary'],
+        'Education Summary': data['education_summary'],
+        'Communication Rating (1-10)': data['communication_skills'],
+        'Skills/Technologies': technical_skills_list,
+        'Certifications': certifications_list,
+        'ABA Skills (1-10)': data['aba_therapy_skills'],
+        'RBT/BCBA Cert': data['rbt_bcba_certification'],
+        'Autism-Care Exp (1-10)': data['autism_care_experience_score'],
+    }
+    # Convert to a single-row DataFrame and concatenate
+    new_df = pd.DataFrame([df_data])
+    st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)
+# --- 4. APP LAYOUT AND LOGIC ---
+st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered")
+st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis")
+# --- Tabs for User and Admin ---
+tab_user, tab_admin = st.tabs(["👤 Resume Uploader (User Panel)", "🔒 Admin Dashboard (Password Protected)"])
+# =========================================================================
+# A. Resume Upload (User Panel)
+# =========================================================================
+with tab_user:
+    st.header("Upload Resumes for Analysis")
+    st.info("Upload multiple PDF or DOCX files. The Groq AI engine will quickly extract and score the key data.")
+    # Job Role Selection
+    job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
+    selected_role = st.selectbox(
+        "**1. Select the Target Job Role** (Influences analysis and scoring)",
+        options=job_role_options,
+        key='selected_role' # Store role in session state for scoring logic
+    )
+    # File Uploader
+    uploaded_files = st.file_uploader(
+        "**2. Upload Resumes** (PDF or DOCX)",
+        type=["pdf", "docx"],
+        accept_multiple_files=True
+    )
+    if st.button("🚀 Analyze All Uploaded Resumes"):
+        if not uploaded_files:
+            st.warning("Please upload one or more resume files to begin analysis.")
+        else:
+            total_files = len(uploaded_files)
+            progress_bar = st.progress(0)
+            # Clear previous individual file analysis displays
+            st.session_state.individual_analysis = []
+            with st.status("Processing Resumes...", expanded=True) as status_box:
+                for i, file in enumerate(uploaded_files):
+                    file_name = file.name
+                    st.write(f"Analyzing **{file_name}**...")
+                    # 1. Extract Text
+                    resume_text = extract_text_from_file(file)
+                    if not resume_text:
+                        st.error(f"Could not extract text from {file_name}. Skipping.")
+                        continue
+                    # 2. Analyze with Groq
+                    analysis = analyze_resume_with_groq(resume_text, selected_role)
+                    if analysis.name == "Extraction Failed":
+                         st.error(f"Groq extraction failed for {file_name}. Skipping.")
+                         continue
+                    # 3. Calculate Score
+                    score = calculate_resume_score(analysis)
+                    # 4. Store Data
+                    append_analysis_to_dataframe(selected_role, analysis, score)
+                    # Store data for individual display below
+                    st.session_state.individual_analysis.append({
+                        'name': analysis.name,
+                        'score': score,
+                        'role': selected_role,
+                        'file_name': file_name
+                    })
+                    # Update progress
+                    progress_bar.progress((i + 1) / total_files)
+                status_box.update(label="Analysis Complete!", state="complete", expanded=False)
+            st.success(f"**✅ Successfully analyzed {total_files} resumes.**")
+    # Display results of the last batch of analysis
+    if 'individual_analysis' in st.session_state and st.session_state.individual_analysis:
+        st.subheader("Last Analysis Summary")
+        for item in st.session_state.individual_analysis:
+            st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**")
+        st.markdown("---")
+        st.caption("All analyzed data is stored in the **Admin Dashboard**.")
+# =========================================================================
+# B. Admin Panel (Password Protected)
+# =========================================================================
+with tab_admin:
+    # --- Login Logic ---
+    if not st.session_state.is_admin_logged_in:
+        st.header("Admin Login")
+        password = st.text_input("Enter Admin Password", type="password")
+        if st.button("🔑 Login"):
+            if password == ADMIN_PASSWORD:
+                st.session_state.is_admin_logged_in = True
+                st.rerun()
+            else:
+                st.error("Incorrect password.")
+        st.stop() # Stop execution until logged in
+    # --- Dashboard Content (Logged In) ---
+    st.header("🎯 Recruitment Dashboard")
+    st.markdown("---")
+    if st.button("🚪 Logout"):
+        st.session_state.is_admin_logged_in = False
+        st.rerun()
+    if st.session_state.analyzed_data.empty:
+        st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
+    else:
+        df = st.session_state.analyzed_data.copy()
+        # --- 1. Shortlisting & Data Display ---
+        st.subheader("Candidate Data Table")
+        st.success(f"**Total Candidates Analyzed: {len(df)}**")
+        # Key columns for display
+        display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']
+        # Editable Data Table (allowing admin to change 'Shortlisted' status)
+        edited_df = st.data_editor(
+            df[display_cols],
+            column_config={
+                "Shortlisted": st.column_config.SelectboxColumn(
+                    "Shortlisted",
+                    help="Mark the candidate as Shortlisted or Rejected.",
+                    options=["No", "Yes"],
+                    required=True,
+                )
+            },
+            key="dashboard_editor",
+            hide_index=True
+        )
+        # Update the session state DataFrame with the edited shortlisting status
+        # This keeps the changes persistent
+        st.session_state.analyzed_data['Shortlisted'] = edited_df['Shortlisted']
+        st.markdown("---")
+        # --- 2. Download Excel File ---
+        st.subheader("📥 Download Data")
+        # The full DataFrame to export
+        df_export = st.session_state.analyzed_data.copy()
+        # Create an in-memory Excel file buffer
+        excel_buffer = io.BytesIO()
+        with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer:
+            df_export.to_excel(writer, index=False, sheet_name='Resume Analysis Data')
+        excel_buffer.seek(0)
+        st.download_button(
+            label="💾 Download All Data as Excel (.xlsx)",
+            data=excel_buffer,
+            file_name="quantum_scrutiny_report.xlsx",
+            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+            help="Downloads the full table including all extracted fields and shortlist status."
+        )
+# --- End of src/streamlit_app.py ---