Spaces:

pranit144
/

COMPANY_DASHBOARD

Sleeping

File size: 58,004 Bytes

# app.py (Company Portal)
import os
import pandas as  pd
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import json
import io           
import numpy as np
import hashlib
from datetime import datetime

year = datetime.now().year
import re           
from flask import Flask, render_template, request, redirect, url_for, session, jsonify, send_file, flash, make_response
import google.generativeai as genai
from datetime import datetime
from supabase import create_client, Client
from dotenv import load_dotenv

load_dotenv()

app = Flask(__name__)
app.secret_key = os.getenv("COMPANY_PORTAL_SECRET_KEY", os.urandom(24))

# --- Supabase Client Initialization ---
supabase_url = os.getenv("SUPABASE_URL")
supabase_key = os.getenv("SUPABASE_KEY") 

if not supabase_url or not supabase_key:
    print("CRITICAL ERROR: SUPABASE_URL and SUPABASE_KEY environment variables are required.")
    supabase: Client = None
else:
    try:
        supabase: Client = create_client(supabase_url, supabase_key)
        print("Supabase client initialized successfully for Company Portal.")
    except Exception as e:
        print(f"CRITICAL ERROR: Error initializing Supabase client for Company Portal: {e}")
        supabase = None

# --- Gemini API Configuration ---
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
HAS_GEMINI = False
if GEMINI_API_KEY:
    try:
        genai.configure(api_key=GEMINI_API_KEY)
        HAS_GEMINI = True
        print("Gemini API Configured.")
    except Exception as e:
        print(f"Warning: Error configuring Gemini API: {e}. AI Insights & NL Query may fail.")
else:
    print("Warning: GEMINI_API_KEY not found in .env file. AI Insights & NL Query will not work.")

# --- Helper Functions ---

def verify_password(stored_hash, password):
    """Verifies a password against a stored SHA256 hash."""
    input_hash = hashlib.sha256(password.encode()).hexdigest()
    return input_hash == stored_hash

def load_student_data():
    """Loads all student data from the Supabase 'students' table."""
    if not supabase:
        flash("Database connection error. Cannot load student data.", "danger")
        return pd.DataFrame()

    try:
        response = supabase.from_('companies').select('*').execute()
        if response.data:
            df = pd.DataFrame(response.data)
            numeric_cols = ['cgpa', 'backlogs', 'typing_speed']
            for col in numeric_cols:
                if col in df.columns:
                    # Coerce errors to NaN, then handle NaN later if needed
                    df[col] = pd.to_numeric(df[col], errors='coerce')
            if 'backlogs' in df.columns:
                # Fill NaN with 0 before converting to int
                df['backlogs'] = df['backlogs'].fillna(0).astype(int)
            # Example of how you might handle array columns if stored as strings (adjust based on DB schema)
            # list_cols = ['programming_languages', 'tools_technologies', 'soft_skills', 'preferred_roles', 'certifications']
            # for col in list_cols:
            #     if col in df.columns and df[col].dtype == 'object':
            #         # Attempt to parse comma-separated string or already list-like structure
            #         df[col] = df[col].apply(lambda x: [i.strip() for i in str(x).split(',')] if pd.notna(x) and isinstance(x, str) else x)

            print(f"Loaded {len(df)} student records.") # Debug
            return df
        else:
            print("No student data found in Supabase.") # Debug
            return pd.DataFrame()
    except Exception as e:
        print(f"Error loading student data from Supabase: {e}")
        flash(f"Error loading student data: {e}", "danger")
        return pd.DataFrame()


def send_job_email(job_data):
    """Sends job posting details via email."""
    try:
        # Email configuration from environment variables
        smtp_server = os.getenv("SMTP_SERVER")
        smtp_port = int(os.getenv("SMTP_PORT", 0))
        smtp_username = os.getenv("SMTP_USERNAME")
        smtp_password = os.getenv("SMTP_PASSWORD")
        if not all([smtp_server, smtp_port, smtp_username, smtp_password]):
            raise RuntimeError("SMTP credentials are missing or invalid in .env")
        # Recipient email
        recipient_email ="pranit.chilbule221@vit.edu"

        # Create message
        msg = MIMEMultipart()
        msg['From'] = smtp_username
        msg['To'] = recipient_email
        msg['Subject'] = f"New Job Posting: {job_data['title']}"

        # Email body
        body = f"""
        <html lang="en">
        <head>
          <meta charset="UTF-8">
          <title>New Job Posting</title>
        </head>
        <body style="margin:0;padding:0;background-color:#f4f4f4;font-family:Arial,sans-serif;">
          <!--[if mso]>
            <style>
              body, table, td {{
                font-family: Arial, sans-serif !important;
              }}
            </style>
          <![endif]-->

          <table width="100%" cellpadding="0" cellspacing="0" role="presentation">
            <tr>
              <td align="center" style="padding:20px 0;">
                <table width="600" cellpadding="0" cellspacing="0" role="presentation"
                       style="background-color:#ffffff;border-radius:8px;overflow:hidden;
                              box-shadow:0 2px 6px rgba(0,0,0,0.1);">

                  <!-- Header -->
                  <tr>
                    <td style="background-color:#004aad;padding:20px;text-align:center;color:#ffffff;">
                      <h1 style="margin:0;font-size:24px;line-height:28px;">New Job Posting</h1>
                    </td>
                  </tr>

                  <!-- Body content -->
                  <tr>
                    <td style="padding:30px;">
                      <p style="margin:0 0 16px;font-size:16px;line-height:22px;">
                        Hello Team,
                      </p>

                      <table width="100%" cellpadding="0" cellspacing="0" role="presentation"
                             style="margin-bottom:24px;">
                        <tr>
                          <td style="padding:8px 0;font-weight:bold;width:30%;">Title:</td>
                          <td style="padding:8px 0;">{job_data['title']}</td>
                        </tr>
                        <tr style="background-color:#f9f9f9;">
                          <td style="padding:8px 0;font-weight:bold;">Location:</td>
                          <td style="padding:8px 0;">{job_data['location']}</td>
                        </tr>
                        <tr>
                          <td style="padding:8px 0;font-weight:bold;">Required Skills:</td>
                          <td style="padding:8px 0;">{', '.join(job_data['required_skills'])}</td>
                        </tr>
                        <tr style="background-color:#f9f9f9;">
                          <td style="padding:8px 0;font-weight:bold;">Minimum CGPA:</td>
                          <td style="padding:8px 0;">
                            {job_data['min_cgpa'] if job_data['min_cgpa'] else 'Not specified'}
                          </td>
                        </tr>
                        <tr>
                          <td style="padding:8px 0;font-weight:bold;">Experience Level:</td>
                          <td style="padding:8px 0;">{job_data['experience_level']} year(s)</td>
                        </tr>
                      </table>

                      <p style="margin:0 0 8px;font-weight:bold;font-size:16px;">Description:</p>
                      <p style="margin:0 0 24px;font-size:15px;line-height:21px;">
                        {job_data['description']}
                      </p>
                        <a href="https://docs.google.com/forms/d/e/1FAIpQLSdcm0zkEqZjb1sDBaytj9ITLOX8L0LTD81HczJxRAS7LfvSKA/viewform?usp=header" target="_blank">
  <button style="padding: 10px 20px; font-size: 16px; border-radius: 8px; background-color: #4CAF50; color: white; border: none; cursor: pointer;">
    Fill Out the Form
  </button>
</a>

                      <p style="margin:0;font-size:14px;color:#777777;line-height:20px;">
                        This is an automated message from the Company Portal.
                      </p>
                    </td>
                  </tr>

                  <!-- Footer -->
                  <tr>
                    <td style="background-color:#f4f4f4;padding:15px;
                               text-align:center;font-size:12px;color:#999999;">
                      &copy; {year} Your Company Name. All rights reserved.
                    </td>
                  </tr>

                </table>
              </td>
            </tr>
          </table>
        </body>
        </html>
        """

        msg.attach(MIMEText(body, 'html'))

        # Connect to SMTP server and send
        with smtplib.SMTP(smtp_server, smtp_port) as server:
               server.ehlo()  # Identify ourselves to SMTP
               server.starttls()  # Upgrade to secure TLS
               server.ehlo()  # Re-identify after STARTTLS
               server.login(smtp_username, smtp_password)
               server.send_message(msg)

        print(f"Email sent successfully to {recipient_email}")
        return True
    except Exception as e:
        print(f"Error sending email: {e}")
        import traceback
        traceback.print_exc()
        return False


def get_unique_values(df, column):
    """Gets unique, sorted, non-empty values from a DataFrame column, handling comma-separated strings and lists."""
    values = set()
    if column in df.columns and not df[column].isnull().all():
        for item in df[column].dropna():
            item_str = str(item).strip()
            if item_str:
                # Handle actual lists (from DB array type) or comma-separated strings
                if isinstance(item, list):
                    # Ensure items in list are strings and clean them (remove proficiency)
                    values.update(str(x).split('(')[0].strip() for x in item if x and str(x).strip())
                elif ',' in item_str:
                     # Strip proficiency like " (Beginner)"
                    values.update(x.split('(')[0].strip() for x in item_str.split(',') if x.strip())
                else:
                     # Strip proficiency like " (Beginner)"
                    values.add(item_str.split('(')[0].strip())
    return sorted(list(values))


def filter_students(df, filters):
    """Filters the student DataFrame based on various criteria including job requirements."""
    if df.empty:
        print("Filter Students: Input DataFrame is empty.")
        return df

    filtered_df = df.copy()
    print(f"Filter Students: Starting with {len(filtered_df)} students. Filters: {filters}")

    try:
        # CGPA filter
        if filters.get('min_cgpa') is not None and filters['min_cgpa'] != '': # Check for not None explicitly
            try:
                min_cgpa_val = float(filters['min_cgpa'])
                if 'cgpa' in filtered_df.columns:
                    original_count = len(filtered_df)
                    filtered_df = filtered_df[filtered_df['cgpa'].fillna(0) >= min_cgpa_val]
                    print(f"  Applied CGPA >= {min_cgpa_val}: {original_count} -> {len(filtered_df)}")
            except (ValueError, TypeError):
                print(f"  Warning: Invalid CGPA filter value '{filters['min_cgpa']}'")

        # Backlogs filter
        if filters.get('max_backlogs') is not None and filters['max_backlogs'] != '': # Check for not None explicitly
            try:
                 max_backlogs_val = float(filters['max_backlogs'])
                 if 'backlogs' in filtered_df.columns:
                     original_count = len(filtered_df)
                     filtered_df = filtered_df[filtered_df['backlogs'] <= max_backlogs_val]
                     print(f"  Applied Backlogs <= {max_backlogs_val}: {original_count} -> {len(filtered_df)}")
            except (ValueError, TypeError):
                 print(f"  Warning: Invalid Backlogs filter value '{filters['max_backlogs']}'")

        # Department filter (added for NL Query possibility)
        if filters.get('department') and 'department' in filtered_df.columns:
            dept_filter = str(filters['department']).strip()
            if dept_filter:
                original_count = len(filtered_df)
                # Case-insensitive comparison
                filtered_df = filtered_df[filtered_df['department'].str.contains(dept_filter, case=False, na=False)]
                print(f"  Applied Department contains '{dept_filter}': {original_count} -> {len(filtered_df)}")


        # Helper for text contains filtering (case-insensitive) for comma-separated strings or lists
        def text_contains_any(series, patterns):
            if not patterns or series.isnull().all():
                return pd.Series([False] * len(series), index=series.index)

            patterns_set = {str(p).lower().strip() for p in patterns if p and str(p).strip()}
            if not patterns_set:
                return pd.Series([False] * len(series), index=series.index)

            def check_item(item):
                if pd.isna(item): return False
                item_elements = []
                current_item_elements = []
                if isinstance(item, list):
                    current_item_elements = [str(el).lower().split('(')[0].strip() for el in item if el]
                elif isinstance(item, str):
                    current_item_elements = [el.lower().split('(')[0].strip() for el in item.split(',') if el.strip()]
                else:
                    current_item_elements = [str(item).lower().split('(')[0].strip()]
                # Extend the main list
                item_elements.extend(el for el in current_item_elements if el) # Ensure no empty strings
                return any(elem in patterns_set for elem in item_elements)

            return series.apply(check_item)


        # Skills filter (checks programming_languages OR tools_technologies)
        if filters.get('skills'):
            skill_patterns = filters['skills']
            lang_col_exists = 'programming_languages' in filtered_df.columns
            tools_col_exists = 'tools_technologies' in filtered_df.columns
            lang_match = pd.Series([False] * len(filtered_df), index=filtered_df.index)
            tools_match = pd.Series([False] * len(filtered_df), index=filtered_df.index)

            if lang_col_exists: lang_match = text_contains_any(filtered_df['programming_languages'], skill_patterns)
            if tools_col_exists: tools_match = text_contains_any(filtered_df['tools_technologies'], skill_patterns)

            original_count = len(filtered_df)
            filtered_df = filtered_df[lang_match | tools_match]
            print(f"  Applied Skills filter ({skill_patterns}): {original_count} -> {len(filtered_df)}")


        # Preferred roles filter
        if filters.get('roles'):
            role_patterns = filters['roles']
            if 'preferred_roles' in filtered_df.columns:
                original_count = len(filtered_df)
                filtered_df = filtered_df[text_contains_any(filtered_df['preferred_roles'], role_patterns)]
                print(f"  Applied Roles filter ({role_patterns}): {original_count} -> {len(filtered_df)}")

        # Certifications filter (checks certifications OR detailed_certifications)
        if filters.get('certifications'):
            cert_patterns = filters['certifications']
            cert_match = pd.Series([False] * len(filtered_df), index=filtered_df.index)
            detail_cert_match = pd.Series([False] * len(filtered_df), index=filtered_df.index)

            if 'certifications' in filtered_df.columns: cert_match = text_contains_any(filtered_df['certifications'], cert_patterns)
            if 'detailed_certifications' in filtered_df.columns: detail_cert_match = text_contains_any(filtered_df['detailed_certifications'], cert_patterns)

            original_count = len(filtered_df)
            filtered_df = filtered_df[cert_match | detail_cert_match]
            print(f"  Applied Certifications filter ({cert_patterns}): {original_count} -> {len(filtered_df)}")


        # Boolean flags (check if column exists and is not null/empty string/empty list)
        def check_not_empty(series):
            if series is None or series.empty: return pd.Series([False] * len(filtered_df), index=filtered_df.index)
            # Handles NaN, None, empty strings, and potentially empty lists robustly
            return series.apply(lambda x: pd.notna(x) and ( (isinstance(x, str) and x.strip()!='') or (isinstance(x, list) and len(x)>0) or (not isinstance(x,(str,list))) ) )

        # Use the boolean value directly from filters dict (already processed in filter_students_route)
        if filters.get('has_hackathons') is True:
             if 'hackathons' in filtered_df.columns:
                original_count = len(filtered_df)
                filtered_df = filtered_df[check_not_empty(filtered_df['hackathons'])]
                print(f"  Applied Has Hackathons: {original_count} -> {len(filtered_df)}")
             else: print("  Skipped Has Hackathons: Column not found")

        if filters.get('has_experience') is True:
            proj_exists = 'projects' in filtered_df.columns
            intern_exists = 'internships' in filtered_df.columns
            has_proj = check_not_empty(filtered_df.get('projects')) if proj_exists else pd.Series([False] * len(filtered_df), index=filtered_df.index)
            has_intern = check_not_empty(filtered_df.get('internships')) if intern_exists else pd.Series([False] * len(filtered_df), index=filtered_df.index)
            original_count = len(filtered_df)
            filtered_df = filtered_df[has_proj | has_intern]
            print(f"  Applied Has Experience (Proj/Intern): {original_count} -> {len(filtered_df)}")


        if filters.get('has_research') is True:
            pub_exists = 'publications' in filtered_df.columns
            patent_exists = 'patents' in filtered_df.columns
            has_pub = check_not_empty(filtered_df.get('publications')) if pub_exists else pd.Series([False] * len(filtered_df), index=filtered_df.index)
            has_patent = check_not_empty(filtered_df.get('patents')) if patent_exists else pd.Series([False] * len(filtered_df), index=filtered_df.index)
            original_count = len(filtered_df)
            filtered_df = filtered_df[has_pub | has_patent]
            print(f"  Applied Has Research (Pub/Patent): {original_count} -> {len(filtered_df)}")


        # --- Job role-based filtering (Applied if job_id is present) ---
        job_id_filter = filters.get('job_id')
        if job_id_filter and job_id_filter != '' and supabase:
            try:
                job_id_val = int(job_id_filter)
                print(f"  Applying filters for Job ID: {job_id_val}")
                response = supabase.table('jobs').select('min_cgpa, required_skills, experience_level').eq('id', job_id_val).maybe_single().execute()

                if response.data:
                    job = response.data
                    print(f"    Job Requirements: {job}")
                    # Filter by minimum CGPA requirement
                    if job.get('min_cgpa') is not None and 'cgpa' in filtered_df.columns:
                        original_count = len(filtered_df)
                        filtered_df = filtered_df[filtered_df['cgpa'].fillna(0) >= float(job['min_cgpa'])]
                        print(f"    Applied Job CGPA >= {job['min_cgpa']}: {original_count} -> {len(filtered_df)}")

                    # Filter by required skills (ANY logic)
                    if job.get('required_skills'):
                        required_skills_list = job['required_skills']
                        lang_col_exists = 'programming_languages' in filtered_df.columns
                        tools_col_exists = 'tools_technologies' in filtered_df.columns
                        lang_match = pd.Series([False] * len(filtered_df), index=filtered_df.index)
                        tools_match = pd.Series([False] * len(filtered_df), index=filtered_df.index)

                        if lang_col_exists: lang_match = text_contains_any(filtered_df['programming_languages'], required_skills_list)
                        if tools_col_exists: tools_match = text_contains_any(filtered_df['tools_technologies'], required_skills_list)

                        original_count = len(filtered_df)
                        filtered_df = filtered_df[lang_match | tools_match]
                        print(f"    Applied Job Skills filter ({required_skills_list}): {original_count} -> {len(filtered_df)}")

                    # Filter by experience level (Checks if internships or projects exist if experience > 0)
                    if job.get('experience_level') and job['experience_level'] > 0:
                         intern_exists = 'internships' in filtered_df.columns
                         proj_exists = 'projects' in filtered_df.columns
                         has_intern = check_not_empty(filtered_df.get('internships')) if intern_exists else pd.Series([False] * len(filtered_df), index=filtered_df.index)
                         has_proj = check_not_empty(filtered_df.get('projects')) if proj_exists else pd.Series([False] * len(filtered_df), index=filtered_df.index)

                         if intern_exists or proj_exists:
                             original_count = len(filtered_df)
                             filtered_df = filtered_df[has_proj | has_intern]
                             print(f"    Applied Job Experience (>0 requires Internship/Project): {original_count} -> {len(filtered_df)}")
                         else: print("    Skipped Job Experience: Internships/Projects columns not found")
                else:
                    print(f"    Warning: Job ID {job_id_val} not found for filtering.")
                    flash(f"Warning: Job ID {job_id_val} not found, could not apply job-specific filters.", "warning")

            except ValueError:
                 print(f"  Invalid job_id format: {job_id_filter}")
            except Exception as e:
                print(f"  Error fetching/applying job details for filtering: {e}")
                flash(f"Warning: Could not apply filters for Job ID {job_id_filter}. Error: {e}", "warning")

    except Exception as e:
        print(f"Error during filtering process: {e}")
        import traceback
        traceback.print_exc()
        flash(f"An critical error occurred during filtering: {e}", "danger")
        return df # Return original df on major error

    print(f"Filter Students: Finished with {len(filtered_df)} students.")
    return filtered_df


def get_ai_insights(students_data, role):
    """Generates AI insights using Gemini for selected students and a role."""
    if not HAS_GEMINI:
        return "AI Insights disabled: Gemini API key not configured."
    if students_data.empty:
        return "No student data provided for analysis."

    print(f"Generating AI insights for {len(students_data)} students, role: {role}") # Debug

    # Format the student data for Gemini prompt (Simplified)
    students_prompt = f"Analyze the following {len(students_data)} students based *only* on the provided profile data for suitability regarding the **{role}** role:\n\n---\n"
    columns_to_include = [
        'full_name', 'department', 'cgpa', 'backlogs',
        'programming_languages', 'tools_technologies', 'soft_skills',
        'projects', 'internships', 'certifications', 'preferred_roles',
        'strengths', 'weaknesses'
    ]

    for index, student in students_data.iterrows():
        profile_str = f"**Student {index + 1}: {student.get('full_name', 'N/A')}**\n"
        for col in columns_to_include:
            if col != 'full_name':
                value = student.get(col)
                display_value = "N/A"
                if pd.notna(value):
                    if isinstance(value, list):
                        display_value = ", ".join(map(str, value)) if value else "N/A"
                    elif isinstance(value, float):
                        display_value = f"{value:.2f}"
                    elif isinstance(value, (int, np.integer)):
                        display_value = str(value)
                    elif isinstance(value, str) and value.strip():
                        display_value = value.strip()

                if display_value != "N/A":
                    profile_str += f"- {col.replace('_', ' ').title()}: {display_value}\n"
        students_prompt += profile_str + "---\n"

    students_prompt += f"""
    **Analysis Request for {role}:**

    1.  **Ranked List:** Provide a ranked list (Top 3-5 recommended) of these students for the role. Briefly justify each rank based *only* on the provided data points (e.g., relevant skills, experience, CGPA if applicable).
    2.  **Strengths & Potential Gaps:** For *each* student listed above, concisely identify 1-2 key strengths and 1-2 potential gaps or areas needing clarification *specifically* for the '{role}' role, based *only* on their profile.
    3.  **Overall Summary:** Briefly summarize which student(s) appear most promising overall for this *specific* role, considering the data provided. Mention any significant trade-offs.

    **Important Constraints:**
    *   Base your entire analysis *strictly* on the profile data given above.
    *   Do not invent or assume information not present.
    *   Focus on relevance to the specified '{role}'.
    *   Format the output clearly using Markdown (bolding, lists).
    """

    try:
        model = genai.GenerativeModel('gemini-1.5-flash') # Or gemini-pro
        # Safety settings can be adjusted if needed
        # safety_settings = [...]
        # response = model.generate_content(students_prompt, safety_settings=safety_settings)
        response = model.generate_content(students_prompt)

        if not response.parts:
             print("Warning: AI response might be blocked due to safety settings.")
             feedback = response.prompt_feedback if hasattr(response, 'prompt_feedback') else "No feedback available."
             print(f"Prompt Feedback: {feedback}")
             return f"AI analysis could not be completed (content potentially blocked by safety filters). Feedback: {feedback}"

        print("AI insights generated successfully.") # Debug
        return response.text
    except Exception as e:
        print(f"Error generating AI insights: {e}")
        import traceback
        traceback.print_exc()
        return f"Error generating AI insights: {str(e)}\n\nPlease check your Gemini API key and configuration."


# --- Chart Data Calculation Functions ---

def calculate_cgpa_distribution(df):
    """Calculates data for CGPA distribution chart."""
    result = {'labels': [], 'data': []}
    if df.empty or 'cgpa' not in df.columns or df['cgpa'].isnull().all():
        print("CGPA Calc: DataFrame empty or 'cgpa' column missing/all null.")
        return result
    try:
        bins = [0, 6, 7, 7.5, 8, 8.5, 9, 9.5, 10.1]
        labels = ['< 6.0', '6.0-6.9', '7.0-7.4', '7.5-7.9', '8.0-8.4', '8.5-8.9', '9.0-9.4', '9.5-10.0']
        cgpa_binned = pd.cut(df['cgpa'].dropna(), bins=bins, labels=labels, right=False, include_lowest=True)
        counts = cgpa_binned.value_counts().sort_index()
        result = {'labels': counts.index.tolist(), 'data': counts.values.tolist()}
        print(f"CGPA Calc Results: Labels={result['labels']}, Data={result['data']}") # DEBUG
        return result
    except Exception as e:
        print(f"Error calculating CGPA dist data: {e}")
        return result

def calculate_department_distribution(df):
    """Calculates data for department distribution chart."""
    result = {'labels': [], 'data': []}
    if df.empty or 'department' not in df.columns or df['department'].isnull().all():
        print("Dept Calc: DataFrame empty or 'department' column missing/all null.")
        return result
    try:
        counts = df['department'].value_counts()
        result = {'labels': counts.index.tolist(), 'data': counts.values.tolist()}
        print(f"Dept Calc Results: Labels={result['labels']}, Data={result['data']}") # DEBUG
        return result
    except Exception as e:
        print(f"Error calculating Dept dist data: {e}")
        return result

def calculate_skills_distribution(df, top_n=10):
    """Calculates data for top N programming skills distribution."""
    result = {'labels': [], 'data': []}
    if df.empty or 'programming_languages' not in df.columns:
        print("Skills Calc: DataFrame empty or 'programming_languages' column missing.")
        return result
    try:
        all_skills = []
        for skills_item in df['programming_languages'].dropna():
            skills = []
            if isinstance(skills_item, str) and skills_item.strip():
                skills = [s.split('(')[0].strip().lower() for s in skills_item.split(',') if s.strip()]
            elif isinstance(skills_item, list):
                skills = [str(s).split('(')[0].strip().lower() for s in skills_item if s and str(s).strip()]
            all_skills.extend(skills)

        if not all_skills: print("Skills Calc: No skills found after parsing."); return result
        skill_counts = pd.Series(all_skills).value_counts().nlargest(top_n)
        labels = [s.capitalize() for s in skill_counts.index.tolist()]
        result = {'labels': labels, 'data': skill_counts.values.tolist()}
        print(f"Skills Calc Results: Labels={result['labels']}, Data={result['data']}") # DEBUG
        return result
    except Exception as e:
        print(f"Error calculating Skills dist data: {e}")
        return result

def calculate_roles_distribution(df, top_n=10):
    """Calculates data for top N preferred roles distribution."""
    result = {'labels': [], 'data': []}
    if df.empty or 'preferred_roles' not in df.columns:
        print("Roles Calc: DataFrame empty or 'preferred_roles' column missing.")
        return result
    try:
        all_roles = []
        for roles_item in df['preferred_roles'].dropna():
            roles = []
            if isinstance(roles_item, str) and roles_item.strip():
                roles = [r.strip().lower() for r in roles_item.split(',') if r.strip()]
            elif isinstance(roles_item, list):
                roles = [str(r).strip().lower() for r in roles_item if r and str(r).strip()]
            all_roles.extend(roles)

        if not all_roles: print("Roles Calc: No roles found after parsing."); return result
        role_counts = pd.Series(all_roles).value_counts().nlargest(top_n)
        labels = [r.title() for r in role_counts.index.tolist()]
        result = {'labels': labels, 'data': role_counts.values.tolist()}
        print(f"Roles Calc Results: Labels={result['labels']}, Data={result['data']}") # DEBUG
        return result
    except Exception as e:
        print(f"Error calculating Roles dist data: {e}")
        return result

# --- Routes ---

@app.route('/')
def home():
    """Redirects to dashboard if logged in, otherwise to login page."""
    if 'company_email' in session:
        return redirect(url_for('dashboard'))
    return redirect(url_for('login'))


@app.route('/login', methods=['GET', 'POST'])
def login():
    if not supabase:
        return make_response(render_template('company_login.html', error="Database service unavailable. Please try again later."), 503)

    error = None
    if request.method == 'POST':
        email = request.form.get('email')
        password = request.form.get('password')

        if not email or not password:
             error = 'Email and password are required.'
             return render_template('company_login.html', error=error)

        try:
            print(f"--- Attempting login for: {email}")
            response = supabase.table('companies').select('*').eq('email', email).maybe_single().execute()
            print(f"--- Supabase response status: {response.data is not None}")

            if response.data:
                company_data = response.data
                print(f"--- Company data found: {company_data.keys()}")
                stored_hash = company_data.get('password')
                print(f"--- Stored hash found: {stored_hash is not None}")

                if stored_hash and verify_password(stored_hash, password):
                    print("--- Password verified")
                    required_keys = ['id', 'email', 'company_name']
                    if all(k in company_data for k in required_keys):
                        session['company_id'] = company_data['id']
                        session['company_email'] = company_data['email']
                        session['company_name'] = company_data['company_name']
                        session.permanent = True
                        print("--- Session set successfully")
                        flash(f"Welcome back, {session['company_name']}!", "success")
                        return redirect(url_for('dashboard'))
                    else:
                        print(f"--- Error: Missing required keys in company_data. Found: {company_data.keys()}")
                        error = "Login failed: Incomplete company profile data in database."
                else:
                    print("--- Password verification failed")
                    error = 'Invalid credentials. Please try again.'
            else:
                print(f"--- Company email '{email}' not found in database.")
                error = 'Company email not found.'
        except Exception as e:
            print(f"--- Login error exception: {e}")
            import traceback
            traceback.print_exc()
            error = 'An error occurred during login. Please contact support.'

    if 'company_email' in session:
        return redirect(url_for('dashboard'))
    return render_template('company_login.html', error=error)


@app.route('/logout')
def logout():
    """Logs the company out."""
    session.pop('company_id', None)
    session.pop('company_email', None)
    session.pop('company_name', None)
    session.pop('filter_options_cache', None) # Clear cached options on logout
    flash("You have been logged out.", "info")
    return redirect(url_for('login'))


@app.route('/dashboard')
def dashboard():
    """Displays the main company dashboard."""
    if 'company_email' not in session:
        flash("Please log in to access the dashboard.", "warning")
        return redirect(url_for('login'))
    if not supabase:
        flash("Database service unavailable.", "danger")
        return render_template('company_dashboard.html', company_name=session.get('company_name', 'Company'), students=[], filter_options={}, initial_chart_data='{}', jobs=[], error="Database connection failed")

    # Load student data
    df = load_student_data()

    # Prepare data for filters
    filter_options = {
        'departments': get_unique_values(df, 'department'),
        'programming_languages': get_unique_values(df, 'programming_languages'),
        'tools': get_unique_values(df, 'tools_technologies'),
        'roles': get_unique_values(df, 'preferred_roles'),
        'certifications': sorted(list(set(get_unique_values(df, 'certifications') + get_unique_values(df, 'detailed_certifications'))))
    }
    # --- Cache filter options in session ---
    session['filter_options_cache'] = filter_options
    # --- End Caching ---

    # --- Calculate Initial Chart Data ---
    initial_chart_data = {
        'cgpa': calculate_cgpa_distribution(df),
        'department': calculate_department_distribution(df),
        'skills': calculate_skills_distribution(df),
        'roles': calculate_roles_distribution(df)
    }
    print("\n--- Initial Chart Data (Backend - Dashboard Route) ---")
    try:
        print(json.dumps(initial_chart_data, indent=2))
    except Exception as json_e: print(f"Error dumping initial chart data: {json_e}")
    print("-----------------------------------------------------\n")

    # Fetch jobs posted by this company
    jobs_list = []
    try:
        response = supabase.table('jobs').select('id, title').eq('company_id', session['company_id']).order('posted_date', desc=True).execute()
        jobs_list = response.data if response.data else []
    except Exception as e:
        print(f"Error fetching jobs for dashboard: {e}")
        flash("Could not load job postings.", "warning")

    # Convert DataFrame NaNs to None for JSON serialization
    display_cols = ['full_name', 'email', 'department', 'cgpa', 'backlogs', 'programming_languages', 'preferred_roles', 'internships', 'projects', 'hackathons', 'publications']
    # Only select columns that actually exist in the dataframe
    existing_display_cols = [col for col in display_cols if col in df.columns]
    students_list = df[existing_display_cols].replace({np.nan: None}).to_dict('records') if not df.empty else []

    return render_template(
        'company_dashboard.html',
        initial_chart_data=initial_chart_data, # Pass dict directly
        company_name=session['company_name'],
        students=students_list,
        filter_options=filter_options, # Pass for initial rendering
        jobs=jobs_list
    )


@app.route('/filter_students', methods=['POST'])
def filter_students_route():
    """Handles AJAX request to filter students and provides updated chart data."""
    if 'company_email' not in session:
        return jsonify({'error': 'Not authenticated'}), 401
    if not supabase:
         return jsonify({'error': 'Database service unavailable'}), 503

    print("\n--- Filter Students Route Called ---") # Debug
    df = load_student_data()
    if df.empty:
         print("Filter Students Route: No student data loaded.")
         return jsonify({'students': [], 'chart_data': {}, 'count': 0})

    filters = request.form.to_dict()

    # Explicitly process boolean flags from form data ('on' means True)
    filters['has_hackathons'] = filters.get('has_hackathons') == 'on'
    filters['has_experience'] = filters.get('has_experience') == 'on'
    filters['has_research'] = filters.get('has_research') == 'on'

    # Get lists correctly
    filters['skills'] = request.form.getlist('skills')
    filters['roles'] = request.form.getlist('roles')
    filters['certifications'] = request.form.getlist('certifications')

    # Apply filters
    filtered_df = filter_students(df, filters)

    # --- Calculate Chart Data for Filtered Results ---
    filtered_chart_data = {
        'cgpa': calculate_cgpa_distribution(filtered_df),
        'department': calculate_department_distribution(filtered_df),
        'skills': calculate_skills_distribution(filtered_df),
        'roles': calculate_roles_distribution(filtered_df)
    }
    print("\n--- Filtered Chart Data (Backend - Filter Route) ---")
    try: print(json.dumps(filtered_chart_data, indent=2))
    except Exception as json_e: print(f"Error dumping filtered chart data: {json_e}")
    print("----------------------------------------------------\n")

    # Prepare data for JSON response
    display_cols = ['full_name', 'email', 'department', 'cgpa', 'backlogs', 'programming_languages', 'preferred_roles', 'internships', 'projects', 'hackathons', 'publications']
    existing_display_cols = [col for col in display_cols if col in filtered_df.columns]
    students_result = filtered_df[existing_display_cols].replace({np.nan: None}).to_dict('records') if not filtered_df.empty else []

    print(f"Filter Students Route: Returning {len(students_result)} students.") # Debug
    return jsonify({
        'students': students_result,
        'chart_data': filtered_chart_data,
        'count': len(filtered_df)
    })


@app.route('/process_nl_query', methods=['POST'])
def process_nl_query():
    """Handles AJAX request to convert natural language query to filters."""
    if 'company_email' not in session:
        return jsonify({'error': 'Not authenticated'}), 401
    if not supabase:
        return jsonify({'error': 'Database service unavailable'}), 503
    if not HAS_GEMINI:
        return jsonify({'error': 'AI Query service not configured or unavailable.'}), 503

    nl_query = request.json.get('query')
    if not nl_query:
        return jsonify({'error': 'No query provided.'}), 400

    print(f"Processing NL Query: '{nl_query}'")

    # Retrieve cached filter options
    cached_options = session.get('filter_options_cache')
    if not cached_options:
        # Fallback: Regenerate if not in session (should be rare)
        print("Warning: Filter options cache miss in NL Query route, regenerating.")
        df_temp = load_student_data()
        cached_options = {
            'departments': get_unique_values(df_temp, 'department'),
            'programming_languages': get_unique_values(df_temp, 'programming_languages'),
            'tools': get_unique_values(df_temp, 'tools_technologies'),
            'roles': get_unique_values(df_temp, 'preferred_roles'),
            'certifications': sorted(list(set(get_unique_values(df_temp, 'certifications') + get_unique_values(df_temp, 'detailed_certifications'))))
        }
        session['filter_options_cache'] = cached_options # Cache it now

    # Combine skills for context
    all_skill_examples = list(set(cached_options.get('programming_languages', []) + cached_options.get('tools', [])))

    # --- Construct Prompt for Gemini ---
    prompt = f"""
    Analyze the following natural language query from a recruiter and convert it into a JSON object containing filter parameters for a student database.

    **Available Filter Fields and Keys (Use these keys in the JSON output):**
    *   `min_cgpa`: Minimum CGPA (float, 0.0 to 10.0).
    *   `max_backlogs`: Maximum number of backlogs allowed (integer, >= 0).
    *   `skills`: List of required skills (strings). Match these against programming languages OR tools.
    *   `roles`: List of preferred job roles (strings).
    *   `certifications`: List of certifications (strings).
    *   `department`: Specific department name (string). Extract if mentioned clearly.
    *   `has_hackathons`: Boolean (true if user mentions hackathons or competitions).
    *   `has_experience`: Boolean (true if user mentions projects or internships).
    *   `has_research`: Boolean (true if user mentions research, publications, or patents).

    **Context - Available Values (Examples - Use for guidance, not exhaustive):**
    *   Departments: {json.dumps(cached_options.get('departments', [])[:10])}...
    *   Skills (Programming/Tools): {json.dumps(all_skill_examples[:15])}...
    *   Roles: {json.dumps(cached_options.get('roles', [])[:10])}...
    *   Certifications: {json.dumps(cached_options.get('certifications', [])[:10])}...

    **Recruiter's Query:**
    "{nl_query}"

    **Instructions:**
    1.  Parse the query to extract filtering criteria.
    2.  Map criteria to the correct filter keys listed above.
    3.  For list fields (`skills`, `roles`, `certifications`), output a JSON list of strings. Include the user's terms.
    4.  For boolean fields (`has_...`), output `true` only if explicitly mentioned. Otherwise, omit the key or set to `false`.
    5.  For `min_cgpa` and `max_backlogs`, extract the numeric value.
    6.  For `department`, extract the specific department name mentioned.
    7.  If a criterion isn't mentioned, omit its key from the JSON.
    8.  Output *only* the JSON object, without any explanations or markdown formatting.

    **Example Input Query:** "Find computer science students with CGPA over 7.5, no backlogs, and who know Python or Java. Experience with projects is a plus."
    **Example JSON Output:**
    {{
      "department": "Computer Science",
      "min_cgpa": 7.5,
      "max_backlogs": 0,
      "skills": ["Python", "Java"],
      "has_experience": true
    }}

    **Now, process the recruiter's query above and generate the JSON output.**
    """

    try:
        print("--- Sending NL prompt to Gemini ---")
        model = genai.GenerativeModel('gemini-1.5-flash') # Or gemini-pro
        response = model.generate_content(prompt)

        # Clean the response (remove markdown backticks, etc.)
        print(f"--- Gemini Raw Response Text ---\n{response.text}\n-----------------------------")
        cleaned_response_text = response.text.strip()
        if cleaned_response_text.startswith('```json'): cleaned_response_text = cleaned_response_text[7:]
        if cleaned_response_text.startswith('```'): cleaned_response_text = cleaned_response_text[3:]
        if cleaned_response_text.endswith('```'): cleaned_response_text = cleaned_response_text[:-3]
        cleaned_response_text = cleaned_response_text.strip()

        if not cleaned_response_text:
             raise ValueError("Gemini returned an empty response after cleaning.")

        filter_params = json.loads(cleaned_response_text)
        print(f"--- Parsed Filter Params ---\n{json.dumps(filter_params, indent=2)}\n--------------------------")

        if not isinstance(filter_params, dict):
            raise ValueError("Gemini response was not a valid JSON object.")

        # Optional: Further validation/type conversion can be added here if needed

        return jsonify({'filters': filter_params})

    except json.JSONDecodeError as e:
        print(f"Error decoding Gemini JSON response: {e}")
        print(f"Response text was: {cleaned_response_text}") # Log cleaned text
        return jsonify({'error': f'AI could not generate valid filter parameters. Please rephrase your query. (JSON Error: {e}). Raw response: {response.text}'}), 400
    except Exception as e:
        print(f"Error processing NL query with Gemini: {e}")
        import traceback
        traceback.print_exc()
        return jsonify({'error': f'An error occurred while processing the AI query: {e}'}), 500


@app.route('/ai_insights', methods=['POST'])
def ai_insights_route():
    """Handles AJAX request for AI insights."""
    if 'company_email' not in session:
        return jsonify({'error': 'Not authenticated'}), 401
    if not supabase:
         return jsonify({'error': 'Database service unavailable'}), 503
    if not HAS_GEMINI:
        return jsonify({'error': 'AI Insights service not configured or unavailable.'}), 503

    df = load_student_data()
    selected_emails = request.form.getlist('selected_students')
    role = request.form.get('role', 'General Role Analysis')

    print(f"AI Insights requested for emails: {selected_emails} and role: {role}")

    if not selected_emails:
        print("No students selected for AI analysis.")
        return jsonify({'error': 'No students selected for analysis.'}), 400

    selected_students_df = df[df['email'].isin(selected_emails)].copy()

    if selected_students_df.empty:
        print(f"Selected student emails not found in loaded data: {selected_emails}")
        return jsonify({'error': 'Selected student data could not be found.'}), 404

    insights = get_ai_insights(selected_students_df, role)
    return jsonify({'insights': insights})


@app.route('/export_filtered', methods=['POST'])
def export_filtered():
    """Exports the currently filtered student list (sent from client) to CSV."""
    if 'company_email' not in session:
        return jsonify({"error": "Authentication required for export"}), 401

    student_data_json = request.form.get('filtered_students')
    if not student_data_json:
        return jsonify({"error": "No student data provided"}), 400

    try:
        student_data = json.loads(student_data_json)
        if not student_data:
             return jsonify({"error": "No students to export"}), 400

        df_to_export = pd.DataFrame(student_data)
        export_columns = [
            'full_name', 'email', 'department', 'cgpa', 'backlogs',
            'programming_languages', 'tools_technologies', 'soft_skills',
            'projects', 'internships', 'certifications', 'detailed_certifications',
            'hackathons', 'publications', 'patents',
            'preferred_roles', 'preferred_location', 'strengths', 'weaknesses',
            'typing_speed', 'linkedin_profile', 'github_profile', 'portfolio_url'
        ]
        available_export_cols = [col for col in export_columns if col in df_to_export.columns]
        df_to_export = df_to_export[available_export_cols]

    except json.JSONDecodeError:
        print("Error decoding JSON for export")
        return jsonify({"error": "Invalid data format for export"}), 400
    except Exception as e:
        print(f"Error preparing export data: {e}")
        return jsonify({"error": f"Server error during export preparation: {e}"}), 500

    try:
        mem_file = io.BytesIO()
        mem_file.write(u'\ufeff'.encode('utf8')) # BOM for Excel
        df_to_export.to_csv(mem_file, index=False, encoding='utf-8-sig')
        mem_file.seek(0)

        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        response = make_response(send_file(
            mem_file,
            mimetype='text/csv',
            download_name=f'filtered_students_{timestamp}.csv',
            as_attachment=True
        ))
        response.headers['Content-Disposition'] = f"attachment; filename=filtered_students_{timestamp}.csv"
        response.headers["Content-type"] = "text/csv; charset=utf-8"
        print(f"Exporting {len(df_to_export)} students to CSV.")
        return response

    except Exception as e:
        print(f"Error sending export file: {e}")
        return jsonify({"error": f"Server error during file generation: {e}"}), 500


@app.route('/jobs', methods=['GET', 'POST'])
def jobs():
    """Displays job postings and handles new job creation."""
    if 'company_email' not in session:
        flash("Please log in to manage job postings.", "warning")
        return redirect(url_for('login'))
    if not supabase:
        flash("Database service unavailable.", "danger")
        return render_template('jobs.html', jobs=[], skills=[], company_name=session.get('company_name', 'Company'),
                               error="Database connection failed")

    company_id = session.get('company_id')
    if not company_id:
        flash("Company session error. Please log in again.", "danger")
        return redirect(url_for('logout'))

    # Load student data once for skills list generation
    df_students = load_student_data()
    prog_langs = get_unique_values(df_students, 'programming_languages')
    tools_tech = get_unique_values(df_students, 'tools_technologies')
    available_skills = sorted(list(set(prog_langs + tools_tech)))

    if request.method == 'POST':
        try:
            required_skills_list = request.form.getlist('required_skills')
            min_cgpa_str = request.form.get('min_cgpa')
            experience_str = request.form.get('experience_level', '0')
            title = request.form.get('title')
            description = request.form.get('description')
            location = request.form.get('location')

            # Basic validation
            error_msg = None
            if not title:
                error_msg = "Job Title is required."
            elif not description:
                error_msg = "Job Description is required."
            elif not required_skills_list:
                error_msg = "At least one required skill must be selected."
            elif not location:
                error_msg = "Location is required."

            if error_msg:
                flash(error_msg, "warning")
                response = supabase.table('jobs').select('*').eq('company_id', company_id).order('posted_date',
                                                                                                 desc=True).execute()
                jobs_list = response.data if response.data else []
                return render_template('jobs.html', jobs=jobs_list, skills=available_skills,
                                       company_name=session['company_name'])

            new_job_data = {
                'company_id': company_id,
                'title': title,
                'description': description,
                'required_skills': required_skills_list,  # Store as array
                'min_cgpa': float(min_cgpa_str) if min_cgpa_str else None,
                'experience_level': int(experience_str) if experience_str.isdigit() else 0,
                'location': location
            }
            response = supabase.table('jobs').insert(new_job_data).execute()

            if response.data:
                flash("New job posted successfully!", "success")

                # Send email notification about the new job
                email_sent = send_job_email(new_job_data)
                if email_sent:
                    flash("Job details sent by email successfully.", "info")
                else:
                    flash("Job posted but email notification failed to send.", "warning")
            else:
                print(f"Job insert response structure: {response}")
                flash("Failed to post job. Please check details and try again.", "danger")

        except Exception as e:
            print(f"Error posting job: {e}")
            import traceback;
            traceback.print_exc()
            flash(f"An error occurred while posting the job: {e}", "danger")

        return redirect(url_for('jobs'))  # Redirect after POST

    # GET request handling
    jobs_list = []
    try:
        response_jobs = supabase.table('jobs').select('*').eq('company_id', company_id).order('posted_date',
                                                                                              desc=True).execute()
        jobs_list = response_jobs.data if response_jobs.data else []
    except Exception as e:
        print(f"Error fetching data for jobs page: {e}")
        flash("Could not load job data.", "warning")

    return render_template('jobs.html', jobs=jobs_list, skills=available_skills, company_name=session['company_name'])


@app.route('/job/<int:job_id>')
def job_details(job_id):
    """Displays details for a specific job and eligible candidates."""
    if 'company_email' not in session:
        flash("Please log in to view job details.", "warning")
        return redirect(url_for('login'))
    if not supabase:
        flash("Database service unavailable.", "danger")
        return render_template('job_details.html', job=None, candidates_full=[], candidates_table=[], company_name=session.get('company_name', 'Company'), error="Database connection failed")

    job_data = None
    eligible_candidates_list = []
    candidates_full_list = []
    company_id = session.get('company_id')

    try:
        # Fetch the specific job details
        response_job = supabase.table('jobs').select('*').eq('id', job_id).maybe_single().execute()
        job_data = response_job.data

        if not job_data:
            flash("Job not found.", "danger")
            return redirect(url_for('jobs'))

        if job_data.get('company_id') != company_id:
           flash("You are not authorized to view details for this job.", "danger")
           print(f"Unauthorized access attempt to job {job_id} by company {company_id}")
           return redirect(url_for('jobs'))

        # Find eligible candidates
        df_students = load_student_data()
        if not df_students.empty:
            filters = {'job_id': job_id}
            eligible_df = filter_students(df_students.copy(), filters)

            # Prepare list for the table view (limited columns)
            display_cols_table = ['full_name', 'email', 'department', 'cgpa', 'programming_languages', 'projects', 'internships']
            existing_display_cols_table = [col for col in display_cols_table if col in eligible_df.columns]
            eligible_candidates_list = eligible_df[existing_display_cols_table].replace({np.nan: None}).to_dict('records') if not eligible_df.empty else []

            # Prepare list with full data for the modal
            candidates_full_list = eligible_df.replace({np.nan: None}).to_dict('records') if not eligible_df.empty else []

        else:
            flash("Could not load student data to find eligible candidates.", "warning")

    except Exception as e:
        print(f"Error fetching job details or candidates for job {job_id}: {e}")
        import traceback; traceback.print_exc()
        flash("An error occurred while loading job details or finding candidates.", "danger")
        if not job_data: return redirect(url_for('jobs'))

    return render_template(
        'job_details.html',
        job=job_data,
        candidates_full=json.dumps(candidates_full_list), # Pass full data as JSON string for JS
        candidates_table=eligible_candidates_list, # Data for the HTML table
        company_name=session.get('company_name', 'Company')
    )


# --- Error Handlers (Use base.html now) ---
@app.errorhandler(404)
def page_not_found(e):
    return render_template('404.html'), 404

@app.errorhandler(500)
def internal_server_error(e):
    print(f"Server Error: {e}")
    import traceback
    traceback.print_exc()
    return render_template('500.html', error=e), 500

# --- Context Processor for Year ---
@app.context_processor
def inject_now():
    return {'now': datetime.utcnow()}

# --- Run Application ---
if __name__ == '__main__':
    if not supabase:
         print("\nWARNING: Supabase client failed to initialize.")
         print("Application will run but database operations will likely fail.")
         print("Check your .env file and Supabase project status.\n")
    port = int(os.environ.get("PORT", 5001))
    debug_mode = os.environ.get("FLASK_DEBUG", "true").lower() == "true"
    print(f"Starting Flask app on port {port} with debug mode: {debug_mode}")
    app.run(debug=debug_mode, port=port, host='0.0.0.0')