# app.py (Company Portal) import os import pandas as pd import smtplib from email.mime.text import MIMEText from email.mime.multipart import MIMEMultipart import json import io import numpy as np import hashlib from datetime import datetime year = datetime.now().year import re from flask import Flask, render_template, request, redirect, url_for, session, jsonify, send_file, flash, make_response import google.generativeai as genai from datetime import datetime from supabase import create_client, Client from dotenv import load_dotenv load_dotenv() app = Flask(__name__) app.secret_key = os.getenv("COMPANY_PORTAL_SECRET_KEY", os.urandom(24)) # --- Supabase Client Initialization --- supabase_url = os.getenv("SUPABASE_URL") supabase_key = os.getenv("SUPABASE_KEY") if not supabase_url or not supabase_key: print("CRITICAL ERROR: SUPABASE_URL and SUPABASE_KEY environment variables are required.") supabase: Client = None else: try: supabase: Client = create_client(supabase_url, supabase_key) print("Supabase client initialized successfully for Company Portal.") except Exception as e: print(f"CRITICAL ERROR: Error initializing Supabase client for Company Portal: {e}") supabase = None # --- Gemini API Configuration --- GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") HAS_GEMINI = False if GEMINI_API_KEY: try: genai.configure(api_key=GEMINI_API_KEY) HAS_GEMINI = True print("Gemini API Configured.") except Exception as e: print(f"Warning: Error configuring Gemini API: {e}. AI Insights & NL Query may fail.") else: print("Warning: GEMINI_API_KEY not found in .env file. AI Insights & NL Query will not work.") # --- Helper Functions --- def verify_password(stored_hash, password): """Verifies a password against a stored SHA256 hash.""" input_hash = hashlib.sha256(password.encode()).hexdigest() return input_hash == stored_hash def load_student_data(): """Loads all student data from the Supabase 'students' table.""" if not supabase: flash("Database connection error. Cannot load student data.", "danger") return pd.DataFrame() try: response = supabase.from_('companies').select('*').execute() if response.data: df = pd.DataFrame(response.data) numeric_cols = ['cgpa', 'backlogs', 'typing_speed'] for col in numeric_cols: if col in df.columns: # Coerce errors to NaN, then handle NaN later if needed df[col] = pd.to_numeric(df[col], errors='coerce') if 'backlogs' in df.columns: # Fill NaN with 0 before converting to int df['backlogs'] = df['backlogs'].fillna(0).astype(int) # Example of how you might handle array columns if stored as strings (adjust based on DB schema) # list_cols = ['programming_languages', 'tools_technologies', 'soft_skills', 'preferred_roles', 'certifications'] # for col in list_cols: # if col in df.columns and df[col].dtype == 'object': # # Attempt to parse comma-separated string or already list-like structure # df[col] = df[col].apply(lambda x: [i.strip() for i in str(x).split(',')] if pd.notna(x) and isinstance(x, str) else x) print(f"Loaded {len(df)} student records.") # Debug return df else: print("No student data found in Supabase.") # Debug return pd.DataFrame() except Exception as e: print(f"Error loading student data from Supabase: {e}") flash(f"Error loading student data: {e}", "danger") return pd.DataFrame() def send_job_email(job_data): """Sends job posting details via email.""" try: # Email configuration from environment variables smtp_server = os.getenv("SMTP_SERVER") smtp_port = int(os.getenv("SMTP_PORT", 0)) smtp_username = os.getenv("SMTP_USERNAME") smtp_password = os.getenv("SMTP_PASSWORD") if not all([smtp_server, smtp_port, smtp_username, smtp_password]): raise RuntimeError("SMTP credentials are missing or invalid in .env") # Recipient email recipient_email ="pranit.chilbule221@vit.edu" # Create message msg = MIMEMultipart() msg['From'] = smtp_username msg['To'] = recipient_email msg['Subject'] = f"New Job Posting: {job_data['title']}" # Email body body = f""" New Job Posting

New Job Posting

Hello Team,

Title: {job_data['title']}
Location: {job_data['location']}
Required Skills: {', '.join(job_data['required_skills'])}
Minimum CGPA: {job_data['min_cgpa'] if job_data['min_cgpa'] else 'Not specified'}
Experience Level: {job_data['experience_level']} year(s)

Description:

{job_data['description']}

This is an automated message from the Company Portal.

© {year} Your Company Name. All rights reserved.
""" msg.attach(MIMEText(body, 'html')) # Connect to SMTP server and send with smtplib.SMTP(smtp_server, smtp_port) as server: server.ehlo() # Identify ourselves to SMTP server.starttls() # Upgrade to secure TLS server.ehlo() # Re-identify after STARTTLS server.login(smtp_username, smtp_password) server.send_message(msg) print(f"Email sent successfully to {recipient_email}") return True except Exception as e: print(f"Error sending email: {e}") import traceback traceback.print_exc() return False def get_unique_values(df, column): """Gets unique, sorted, non-empty values from a DataFrame column, handling comma-separated strings and lists.""" values = set() if column in df.columns and not df[column].isnull().all(): for item in df[column].dropna(): item_str = str(item).strip() if item_str: # Handle actual lists (from DB array type) or comma-separated strings if isinstance(item, list): # Ensure items in list are strings and clean them (remove proficiency) values.update(str(x).split('(')[0].strip() for x in item if x and str(x).strip()) elif ',' in item_str: # Strip proficiency like " (Beginner)" values.update(x.split('(')[0].strip() for x in item_str.split(',') if x.strip()) else: # Strip proficiency like " (Beginner)" values.add(item_str.split('(')[0].strip()) return sorted(list(values)) def filter_students(df, filters): """Filters the student DataFrame based on various criteria including job requirements.""" if df.empty: print("Filter Students: Input DataFrame is empty.") return df filtered_df = df.copy() print(f"Filter Students: Starting with {len(filtered_df)} students. Filters: {filters}") try: # CGPA filter if filters.get('min_cgpa') is not None and filters['min_cgpa'] != '': # Check for not None explicitly try: min_cgpa_val = float(filters['min_cgpa']) if 'cgpa' in filtered_df.columns: original_count = len(filtered_df) filtered_df = filtered_df[filtered_df['cgpa'].fillna(0) >= min_cgpa_val] print(f" Applied CGPA >= {min_cgpa_val}: {original_count} -> {len(filtered_df)}") except (ValueError, TypeError): print(f" Warning: Invalid CGPA filter value '{filters['min_cgpa']}'") # Backlogs filter if filters.get('max_backlogs') is not None and filters['max_backlogs'] != '': # Check for not None explicitly try: max_backlogs_val = float(filters['max_backlogs']) if 'backlogs' in filtered_df.columns: original_count = len(filtered_df) filtered_df = filtered_df[filtered_df['backlogs'] <= max_backlogs_val] print(f" Applied Backlogs <= {max_backlogs_val}: {original_count} -> {len(filtered_df)}") except (ValueError, TypeError): print(f" Warning: Invalid Backlogs filter value '{filters['max_backlogs']}'") # Department filter (added for NL Query possibility) if filters.get('department') and 'department' in filtered_df.columns: dept_filter = str(filters['department']).strip() if dept_filter: original_count = len(filtered_df) # Case-insensitive comparison filtered_df = filtered_df[filtered_df['department'].str.contains(dept_filter, case=False, na=False)] print(f" Applied Department contains '{dept_filter}': {original_count} -> {len(filtered_df)}") # Helper for text contains filtering (case-insensitive) for comma-separated strings or lists def text_contains_any(series, patterns): if not patterns or series.isnull().all(): return pd.Series([False] * len(series), index=series.index) patterns_set = {str(p).lower().strip() for p in patterns if p and str(p).strip()} if not patterns_set: return pd.Series([False] * len(series), index=series.index) def check_item(item): if pd.isna(item): return False item_elements = [] current_item_elements = [] if isinstance(item, list): current_item_elements = [str(el).lower().split('(')[0].strip() for el in item if el] elif isinstance(item, str): current_item_elements = [el.lower().split('(')[0].strip() for el in item.split(',') if el.strip()] else: current_item_elements = [str(item).lower().split('(')[0].strip()] # Extend the main list item_elements.extend(el for el in current_item_elements if el) # Ensure no empty strings return any(elem in patterns_set for elem in item_elements) return series.apply(check_item) # Skills filter (checks programming_languages OR tools_technologies) if filters.get('skills'): skill_patterns = filters['skills'] lang_col_exists = 'programming_languages' in filtered_df.columns tools_col_exists = 'tools_technologies' in filtered_df.columns lang_match = pd.Series([False] * len(filtered_df), index=filtered_df.index) tools_match = pd.Series([False] * len(filtered_df), index=filtered_df.index) if lang_col_exists: lang_match = text_contains_any(filtered_df['programming_languages'], skill_patterns) if tools_col_exists: tools_match = text_contains_any(filtered_df['tools_technologies'], skill_patterns) original_count = len(filtered_df) filtered_df = filtered_df[lang_match | tools_match] print(f" Applied Skills filter ({skill_patterns}): {original_count} -> {len(filtered_df)}") # Preferred roles filter if filters.get('roles'): role_patterns = filters['roles'] if 'preferred_roles' in filtered_df.columns: original_count = len(filtered_df) filtered_df = filtered_df[text_contains_any(filtered_df['preferred_roles'], role_patterns)] print(f" Applied Roles filter ({role_patterns}): {original_count} -> {len(filtered_df)}") # Certifications filter (checks certifications OR detailed_certifications) if filters.get('certifications'): cert_patterns = filters['certifications'] cert_match = pd.Series([False] * len(filtered_df), index=filtered_df.index) detail_cert_match = pd.Series([False] * len(filtered_df), index=filtered_df.index) if 'certifications' in filtered_df.columns: cert_match = text_contains_any(filtered_df['certifications'], cert_patterns) if 'detailed_certifications' in filtered_df.columns: detail_cert_match = text_contains_any(filtered_df['detailed_certifications'], cert_patterns) original_count = len(filtered_df) filtered_df = filtered_df[cert_match | detail_cert_match] print(f" Applied Certifications filter ({cert_patterns}): {original_count} -> {len(filtered_df)}") # Boolean flags (check if column exists and is not null/empty string/empty list) def check_not_empty(series): if series is None or series.empty: return pd.Series([False] * len(filtered_df), index=filtered_df.index) # Handles NaN, None, empty strings, and potentially empty lists robustly return series.apply(lambda x: pd.notna(x) and ( (isinstance(x, str) and x.strip()!='') or (isinstance(x, list) and len(x)>0) or (not isinstance(x,(str,list))) ) ) # Use the boolean value directly from filters dict (already processed in filter_students_route) if filters.get('has_hackathons') is True: if 'hackathons' in filtered_df.columns: original_count = len(filtered_df) filtered_df = filtered_df[check_not_empty(filtered_df['hackathons'])] print(f" Applied Has Hackathons: {original_count} -> {len(filtered_df)}") else: print(" Skipped Has Hackathons: Column not found") if filters.get('has_experience') is True: proj_exists = 'projects' in filtered_df.columns intern_exists = 'internships' in filtered_df.columns has_proj = check_not_empty(filtered_df.get('projects')) if proj_exists else pd.Series([False] * len(filtered_df), index=filtered_df.index) has_intern = check_not_empty(filtered_df.get('internships')) if intern_exists else pd.Series([False] * len(filtered_df), index=filtered_df.index) original_count = len(filtered_df) filtered_df = filtered_df[has_proj | has_intern] print(f" Applied Has Experience (Proj/Intern): {original_count} -> {len(filtered_df)}") if filters.get('has_research') is True: pub_exists = 'publications' in filtered_df.columns patent_exists = 'patents' in filtered_df.columns has_pub = check_not_empty(filtered_df.get('publications')) if pub_exists else pd.Series([False] * len(filtered_df), index=filtered_df.index) has_patent = check_not_empty(filtered_df.get('patents')) if patent_exists else pd.Series([False] * len(filtered_df), index=filtered_df.index) original_count = len(filtered_df) filtered_df = filtered_df[has_pub | has_patent] print(f" Applied Has Research (Pub/Patent): {original_count} -> {len(filtered_df)}") # --- Job role-based filtering (Applied if job_id is present) --- job_id_filter = filters.get('job_id') if job_id_filter and job_id_filter != '' and supabase: try: job_id_val = int(job_id_filter) print(f" Applying filters for Job ID: {job_id_val}") response = supabase.table('jobs').select('min_cgpa, required_skills, experience_level').eq('id', job_id_val).maybe_single().execute() if response.data: job = response.data print(f" Job Requirements: {job}") # Filter by minimum CGPA requirement if job.get('min_cgpa') is not None and 'cgpa' in filtered_df.columns: original_count = len(filtered_df) filtered_df = filtered_df[filtered_df['cgpa'].fillna(0) >= float(job['min_cgpa'])] print(f" Applied Job CGPA >= {job['min_cgpa']}: {original_count} -> {len(filtered_df)}") # Filter by required skills (ANY logic) if job.get('required_skills'): required_skills_list = job['required_skills'] lang_col_exists = 'programming_languages' in filtered_df.columns tools_col_exists = 'tools_technologies' in filtered_df.columns lang_match = pd.Series([False] * len(filtered_df), index=filtered_df.index) tools_match = pd.Series([False] * len(filtered_df), index=filtered_df.index) if lang_col_exists: lang_match = text_contains_any(filtered_df['programming_languages'], required_skills_list) if tools_col_exists: tools_match = text_contains_any(filtered_df['tools_technologies'], required_skills_list) original_count = len(filtered_df) filtered_df = filtered_df[lang_match | tools_match] print(f" Applied Job Skills filter ({required_skills_list}): {original_count} -> {len(filtered_df)}") # Filter by experience level (Checks if internships or projects exist if experience > 0) if job.get('experience_level') and job['experience_level'] > 0: intern_exists = 'internships' in filtered_df.columns proj_exists = 'projects' in filtered_df.columns has_intern = check_not_empty(filtered_df.get('internships')) if intern_exists else pd.Series([False] * len(filtered_df), index=filtered_df.index) has_proj = check_not_empty(filtered_df.get('projects')) if proj_exists else pd.Series([False] * len(filtered_df), index=filtered_df.index) if intern_exists or proj_exists: original_count = len(filtered_df) filtered_df = filtered_df[has_proj | has_intern] print(f" Applied Job Experience (>0 requires Internship/Project): {original_count} -> {len(filtered_df)}") else: print(" Skipped Job Experience: Internships/Projects columns not found") else: print(f" Warning: Job ID {job_id_val} not found for filtering.") flash(f"Warning: Job ID {job_id_val} not found, could not apply job-specific filters.", "warning") except ValueError: print(f" Invalid job_id format: {job_id_filter}") except Exception as e: print(f" Error fetching/applying job details for filtering: {e}") flash(f"Warning: Could not apply filters for Job ID {job_id_filter}. Error: {e}", "warning") except Exception as e: print(f"Error during filtering process: {e}") import traceback traceback.print_exc() flash(f"An critical error occurred during filtering: {e}", "danger") return df # Return original df on major error print(f"Filter Students: Finished with {len(filtered_df)} students.") return filtered_df def get_ai_insights(students_data, role): """Generates AI insights using Gemini for selected students and a role.""" if not HAS_GEMINI: return "AI Insights disabled: Gemini API key not configured." if students_data.empty: return "No student data provided for analysis." print(f"Generating AI insights for {len(students_data)} students, role: {role}") # Debug # Format the student data for Gemini prompt (Simplified) students_prompt = f"Analyze the following {len(students_data)} students based *only* on the provided profile data for suitability regarding the **{role}** role:\n\n---\n" columns_to_include = [ 'full_name', 'department', 'cgpa', 'backlogs', 'programming_languages', 'tools_technologies', 'soft_skills', 'projects', 'internships', 'certifications', 'preferred_roles', 'strengths', 'weaknesses' ] for index, student in students_data.iterrows(): profile_str = f"**Student {index + 1}: {student.get('full_name', 'N/A')}**\n" for col in columns_to_include: if col != 'full_name': value = student.get(col) display_value = "N/A" if pd.notna(value): if isinstance(value, list): display_value = ", ".join(map(str, value)) if value else "N/A" elif isinstance(value, float): display_value = f"{value:.2f}" elif isinstance(value, (int, np.integer)): display_value = str(value) elif isinstance(value, str) and value.strip(): display_value = value.strip() if display_value != "N/A": profile_str += f"- {col.replace('_', ' ').title()}: {display_value}\n" students_prompt += profile_str + "---\n" students_prompt += f""" **Analysis Request for {role}:** 1. **Ranked List:** Provide a ranked list (Top 3-5 recommended) of these students for the role. Briefly justify each rank based *only* on the provided data points (e.g., relevant skills, experience, CGPA if applicable). 2. **Strengths & Potential Gaps:** For *each* student listed above, concisely identify 1-2 key strengths and 1-2 potential gaps or areas needing clarification *specifically* for the '{role}' role, based *only* on their profile. 3. **Overall Summary:** Briefly summarize which student(s) appear most promising overall for this *specific* role, considering the data provided. Mention any significant trade-offs. **Important Constraints:** * Base your entire analysis *strictly* on the profile data given above. * Do not invent or assume information not present. * Focus on relevance to the specified '{role}'. * Format the output clearly using Markdown (bolding, lists). """ try: model = genai.GenerativeModel('gemini-1.5-flash') # Or gemini-pro # Safety settings can be adjusted if needed # safety_settings = [...] # response = model.generate_content(students_prompt, safety_settings=safety_settings) response = model.generate_content(students_prompt) if not response.parts: print("Warning: AI response might be blocked due to safety settings.") feedback = response.prompt_feedback if hasattr(response, 'prompt_feedback') else "No feedback available." print(f"Prompt Feedback: {feedback}") return f"AI analysis could not be completed (content potentially blocked by safety filters). Feedback: {feedback}" print("AI insights generated successfully.") # Debug return response.text except Exception as e: print(f"Error generating AI insights: {e}") import traceback traceback.print_exc() return f"Error generating AI insights: {str(e)}\n\nPlease check your Gemini API key and configuration." # --- Chart Data Calculation Functions --- def calculate_cgpa_distribution(df): """Calculates data for CGPA distribution chart.""" result = {'labels': [], 'data': []} if df.empty or 'cgpa' not in df.columns or df['cgpa'].isnull().all(): print("CGPA Calc: DataFrame empty or 'cgpa' column missing/all null.") return result try: bins = [0, 6, 7, 7.5, 8, 8.5, 9, 9.5, 10.1] labels = ['< 6.0', '6.0-6.9', '7.0-7.4', '7.5-7.9', '8.0-8.4', '8.5-8.9', '9.0-9.4', '9.5-10.0'] cgpa_binned = pd.cut(df['cgpa'].dropna(), bins=bins, labels=labels, right=False, include_lowest=True) counts = cgpa_binned.value_counts().sort_index() result = {'labels': counts.index.tolist(), 'data': counts.values.tolist()} print(f"CGPA Calc Results: Labels={result['labels']}, Data={result['data']}") # DEBUG return result except Exception as e: print(f"Error calculating CGPA dist data: {e}") return result def calculate_department_distribution(df): """Calculates data for department distribution chart.""" result = {'labels': [], 'data': []} if df.empty or 'department' not in df.columns or df['department'].isnull().all(): print("Dept Calc: DataFrame empty or 'department' column missing/all null.") return result try: counts = df['department'].value_counts() result = {'labels': counts.index.tolist(), 'data': counts.values.tolist()} print(f"Dept Calc Results: Labels={result['labels']}, Data={result['data']}") # DEBUG return result except Exception as e: print(f"Error calculating Dept dist data: {e}") return result def calculate_skills_distribution(df, top_n=10): """Calculates data for top N programming skills distribution.""" result = {'labels': [], 'data': []} if df.empty or 'programming_languages' not in df.columns: print("Skills Calc: DataFrame empty or 'programming_languages' column missing.") return result try: all_skills = [] for skills_item in df['programming_languages'].dropna(): skills = [] if isinstance(skills_item, str) and skills_item.strip(): skills = [s.split('(')[0].strip().lower() for s in skills_item.split(',') if s.strip()] elif isinstance(skills_item, list): skills = [str(s).split('(')[0].strip().lower() for s in skills_item if s and str(s).strip()] all_skills.extend(skills) if not all_skills: print("Skills Calc: No skills found after parsing."); return result skill_counts = pd.Series(all_skills).value_counts().nlargest(top_n) labels = [s.capitalize() for s in skill_counts.index.tolist()] result = {'labels': labels, 'data': skill_counts.values.tolist()} print(f"Skills Calc Results: Labels={result['labels']}, Data={result['data']}") # DEBUG return result except Exception as e: print(f"Error calculating Skills dist data: {e}") return result def calculate_roles_distribution(df, top_n=10): """Calculates data for top N preferred roles distribution.""" result = {'labels': [], 'data': []} if df.empty or 'preferred_roles' not in df.columns: print("Roles Calc: DataFrame empty or 'preferred_roles' column missing.") return result try: all_roles = [] for roles_item in df['preferred_roles'].dropna(): roles = [] if isinstance(roles_item, str) and roles_item.strip(): roles = [r.strip().lower() for r in roles_item.split(',') if r.strip()] elif isinstance(roles_item, list): roles = [str(r).strip().lower() for r in roles_item if r and str(r).strip()] all_roles.extend(roles) if not all_roles: print("Roles Calc: No roles found after parsing."); return result role_counts = pd.Series(all_roles).value_counts().nlargest(top_n) labels = [r.title() for r in role_counts.index.tolist()] result = {'labels': labels, 'data': role_counts.values.tolist()} print(f"Roles Calc Results: Labels={result['labels']}, Data={result['data']}") # DEBUG return result except Exception as e: print(f"Error calculating Roles dist data: {e}") return result # --- Routes --- @app.route('/') def home(): """Redirects to dashboard if logged in, otherwise to login page.""" if 'company_email' in session: return redirect(url_for('dashboard')) return redirect(url_for('login')) @app.route('/login', methods=['GET', 'POST']) def login(): if not supabase: return make_response(render_template('company_login.html', error="Database service unavailable. Please try again later."), 503) error = None if request.method == 'POST': email = request.form.get('email') password = request.form.get('password') if not email or not password: error = 'Email and password are required.' return render_template('company_login.html', error=error) try: print(f"--- Attempting login for: {email}") response = supabase.table('companies').select('*').eq('email', email).maybe_single().execute() print(f"--- Supabase response status: {response.data is not None}") if response.data: company_data = response.data print(f"--- Company data found: {company_data.keys()}") stored_hash = company_data.get('password') print(f"--- Stored hash found: {stored_hash is not None}") if stored_hash and verify_password(stored_hash, password): print("--- Password verified") required_keys = ['id', 'email', 'company_name'] if all(k in company_data for k in required_keys): session['company_id'] = company_data['id'] session['company_email'] = company_data['email'] session['company_name'] = company_data['company_name'] session.permanent = True print("--- Session set successfully") flash(f"Welcome back, {session['company_name']}!", "success") return redirect(url_for('dashboard')) else: print(f"--- Error: Missing required keys in company_data. Found: {company_data.keys()}") error = "Login failed: Incomplete company profile data in database." else: print("--- Password verification failed") error = 'Invalid credentials. Please try again.' else: print(f"--- Company email '{email}' not found in database.") error = 'Company email not found.' except Exception as e: print(f"--- Login error exception: {e}") import traceback traceback.print_exc() error = 'An error occurred during login. Please contact support.' if 'company_email' in session: return redirect(url_for('dashboard')) return render_template('company_login.html', error=error) @app.route('/logout') def logout(): """Logs the company out.""" session.pop('company_id', None) session.pop('company_email', None) session.pop('company_name', None) session.pop('filter_options_cache', None) # Clear cached options on logout flash("You have been logged out.", "info") return redirect(url_for('login')) @app.route('/dashboard') def dashboard(): """Displays the main company dashboard.""" if 'company_email' not in session: flash("Please log in to access the dashboard.", "warning") return redirect(url_for('login')) if not supabase: flash("Database service unavailable.", "danger") return render_template('company_dashboard.html', company_name=session.get('company_name', 'Company'), students=[], filter_options={}, initial_chart_data='{}', jobs=[], error="Database connection failed") # Load student data df = load_student_data() # Prepare data for filters filter_options = { 'departments': get_unique_values(df, 'department'), 'programming_languages': get_unique_values(df, 'programming_languages'), 'tools': get_unique_values(df, 'tools_technologies'), 'roles': get_unique_values(df, 'preferred_roles'), 'certifications': sorted(list(set(get_unique_values(df, 'certifications') + get_unique_values(df, 'detailed_certifications')))) } # --- Cache filter options in session --- session['filter_options_cache'] = filter_options # --- End Caching --- # --- Calculate Initial Chart Data --- initial_chart_data = { 'cgpa': calculate_cgpa_distribution(df), 'department': calculate_department_distribution(df), 'skills': calculate_skills_distribution(df), 'roles': calculate_roles_distribution(df) } print("\n--- Initial Chart Data (Backend - Dashboard Route) ---") try: print(json.dumps(initial_chart_data, indent=2)) except Exception as json_e: print(f"Error dumping initial chart data: {json_e}") print("-----------------------------------------------------\n") # Fetch jobs posted by this company jobs_list = [] try: response = supabase.table('jobs').select('id, title').eq('company_id', session['company_id']).order('posted_date', desc=True).execute() jobs_list = response.data if response.data else [] except Exception as e: print(f"Error fetching jobs for dashboard: {e}") flash("Could not load job postings.", "warning") # Convert DataFrame NaNs to None for JSON serialization display_cols = ['full_name', 'email', 'department', 'cgpa', 'backlogs', 'programming_languages', 'preferred_roles', 'internships', 'projects', 'hackathons', 'publications'] # Only select columns that actually exist in the dataframe existing_display_cols = [col for col in display_cols if col in df.columns] students_list = df[existing_display_cols].replace({np.nan: None}).to_dict('records') if not df.empty else [] return render_template( 'company_dashboard.html', initial_chart_data=initial_chart_data, # Pass dict directly company_name=session['company_name'], students=students_list, filter_options=filter_options, # Pass for initial rendering jobs=jobs_list ) @app.route('/filter_students', methods=['POST']) def filter_students_route(): """Handles AJAX request to filter students and provides updated chart data.""" if 'company_email' not in session: return jsonify({'error': 'Not authenticated'}), 401 if not supabase: return jsonify({'error': 'Database service unavailable'}), 503 print("\n--- Filter Students Route Called ---") # Debug df = load_student_data() if df.empty: print("Filter Students Route: No student data loaded.") return jsonify({'students': [], 'chart_data': {}, 'count': 0}) filters = request.form.to_dict() # Explicitly process boolean flags from form data ('on' means True) filters['has_hackathons'] = filters.get('has_hackathons') == 'on' filters['has_experience'] = filters.get('has_experience') == 'on' filters['has_research'] = filters.get('has_research') == 'on' # Get lists correctly filters['skills'] = request.form.getlist('skills') filters['roles'] = request.form.getlist('roles') filters['certifications'] = request.form.getlist('certifications') # Apply filters filtered_df = filter_students(df, filters) # --- Calculate Chart Data for Filtered Results --- filtered_chart_data = { 'cgpa': calculate_cgpa_distribution(filtered_df), 'department': calculate_department_distribution(filtered_df), 'skills': calculate_skills_distribution(filtered_df), 'roles': calculate_roles_distribution(filtered_df) } print("\n--- Filtered Chart Data (Backend - Filter Route) ---") try: print(json.dumps(filtered_chart_data, indent=2)) except Exception as json_e: print(f"Error dumping filtered chart data: {json_e}") print("----------------------------------------------------\n") # Prepare data for JSON response display_cols = ['full_name', 'email', 'department', 'cgpa', 'backlogs', 'programming_languages', 'preferred_roles', 'internships', 'projects', 'hackathons', 'publications'] existing_display_cols = [col for col in display_cols if col in filtered_df.columns] students_result = filtered_df[existing_display_cols].replace({np.nan: None}).to_dict('records') if not filtered_df.empty else [] print(f"Filter Students Route: Returning {len(students_result)} students.") # Debug return jsonify({ 'students': students_result, 'chart_data': filtered_chart_data, 'count': len(filtered_df) }) @app.route('/process_nl_query', methods=['POST']) def process_nl_query(): """Handles AJAX request to convert natural language query to filters.""" if 'company_email' not in session: return jsonify({'error': 'Not authenticated'}), 401 if not supabase: return jsonify({'error': 'Database service unavailable'}), 503 if not HAS_GEMINI: return jsonify({'error': 'AI Query service not configured or unavailable.'}), 503 nl_query = request.json.get('query') if not nl_query: return jsonify({'error': 'No query provided.'}), 400 print(f"Processing NL Query: '{nl_query}'") # Retrieve cached filter options cached_options = session.get('filter_options_cache') if not cached_options: # Fallback: Regenerate if not in session (should be rare) print("Warning: Filter options cache miss in NL Query route, regenerating.") df_temp = load_student_data() cached_options = { 'departments': get_unique_values(df_temp, 'department'), 'programming_languages': get_unique_values(df_temp, 'programming_languages'), 'tools': get_unique_values(df_temp, 'tools_technologies'), 'roles': get_unique_values(df_temp, 'preferred_roles'), 'certifications': sorted(list(set(get_unique_values(df_temp, 'certifications') + get_unique_values(df_temp, 'detailed_certifications')))) } session['filter_options_cache'] = cached_options # Cache it now # Combine skills for context all_skill_examples = list(set(cached_options.get('programming_languages', []) + cached_options.get('tools', []))) # --- Construct Prompt for Gemini --- prompt = f""" Analyze the following natural language query from a recruiter and convert it into a JSON object containing filter parameters for a student database. **Available Filter Fields and Keys (Use these keys in the JSON output):** * `min_cgpa`: Minimum CGPA (float, 0.0 to 10.0). * `max_backlogs`: Maximum number of backlogs allowed (integer, >= 0). * `skills`: List of required skills (strings). Match these against programming languages OR tools. * `roles`: List of preferred job roles (strings). * `certifications`: List of certifications (strings). * `department`: Specific department name (string). Extract if mentioned clearly. * `has_hackathons`: Boolean (true if user mentions hackathons or competitions). * `has_experience`: Boolean (true if user mentions projects or internships). * `has_research`: Boolean (true if user mentions research, publications, or patents). **Context - Available Values (Examples - Use for guidance, not exhaustive):** * Departments: {json.dumps(cached_options.get('departments', [])[:10])}... * Skills (Programming/Tools): {json.dumps(all_skill_examples[:15])}... * Roles: {json.dumps(cached_options.get('roles', [])[:10])}... * Certifications: {json.dumps(cached_options.get('certifications', [])[:10])}... **Recruiter's Query:** "{nl_query}" **Instructions:** 1. Parse the query to extract filtering criteria. 2. Map criteria to the correct filter keys listed above. 3. For list fields (`skills`, `roles`, `certifications`), output a JSON list of strings. Include the user's terms. 4. For boolean fields (`has_...`), output `true` only if explicitly mentioned. Otherwise, omit the key or set to `false`. 5. For `min_cgpa` and `max_backlogs`, extract the numeric value. 6. For `department`, extract the specific department name mentioned. 7. If a criterion isn't mentioned, omit its key from the JSON. 8. Output *only* the JSON object, without any explanations or markdown formatting. **Example Input Query:** "Find computer science students with CGPA over 7.5, no backlogs, and who know Python or Java. Experience with projects is a plus." **Example JSON Output:** {{ "department": "Computer Science", "min_cgpa": 7.5, "max_backlogs": 0, "skills": ["Python", "Java"], "has_experience": true }} **Now, process the recruiter's query above and generate the JSON output.** """ try: print("--- Sending NL prompt to Gemini ---") model = genai.GenerativeModel('gemini-1.5-flash') # Or gemini-pro response = model.generate_content(prompt) # Clean the response (remove markdown backticks, etc.) print(f"--- Gemini Raw Response Text ---\n{response.text}\n-----------------------------") cleaned_response_text = response.text.strip() if cleaned_response_text.startswith('```json'): cleaned_response_text = cleaned_response_text[7:] if cleaned_response_text.startswith('```'): cleaned_response_text = cleaned_response_text[3:] if cleaned_response_text.endswith('```'): cleaned_response_text = cleaned_response_text[:-3] cleaned_response_text = cleaned_response_text.strip() if not cleaned_response_text: raise ValueError("Gemini returned an empty response after cleaning.") filter_params = json.loads(cleaned_response_text) print(f"--- Parsed Filter Params ---\n{json.dumps(filter_params, indent=2)}\n--------------------------") if not isinstance(filter_params, dict): raise ValueError("Gemini response was not a valid JSON object.") # Optional: Further validation/type conversion can be added here if needed return jsonify({'filters': filter_params}) except json.JSONDecodeError as e: print(f"Error decoding Gemini JSON response: {e}") print(f"Response text was: {cleaned_response_text}") # Log cleaned text return jsonify({'error': f'AI could not generate valid filter parameters. Please rephrase your query. (JSON Error: {e}). Raw response: {response.text}'}), 400 except Exception as e: print(f"Error processing NL query with Gemini: {e}") import traceback traceback.print_exc() return jsonify({'error': f'An error occurred while processing the AI query: {e}'}), 500 @app.route('/ai_insights', methods=['POST']) def ai_insights_route(): """Handles AJAX request for AI insights.""" if 'company_email' not in session: return jsonify({'error': 'Not authenticated'}), 401 if not supabase: return jsonify({'error': 'Database service unavailable'}), 503 if not HAS_GEMINI: return jsonify({'error': 'AI Insights service not configured or unavailable.'}), 503 df = load_student_data() selected_emails = request.form.getlist('selected_students') role = request.form.get('role', 'General Role Analysis') print(f"AI Insights requested for emails: {selected_emails} and role: {role}") if not selected_emails: print("No students selected for AI analysis.") return jsonify({'error': 'No students selected for analysis.'}), 400 selected_students_df = df[df['email'].isin(selected_emails)].copy() if selected_students_df.empty: print(f"Selected student emails not found in loaded data: {selected_emails}") return jsonify({'error': 'Selected student data could not be found.'}), 404 insights = get_ai_insights(selected_students_df, role) return jsonify({'insights': insights}) @app.route('/export_filtered', methods=['POST']) def export_filtered(): """Exports the currently filtered student list (sent from client) to CSV.""" if 'company_email' not in session: return jsonify({"error": "Authentication required for export"}), 401 student_data_json = request.form.get('filtered_students') if not student_data_json: return jsonify({"error": "No student data provided"}), 400 try: student_data = json.loads(student_data_json) if not student_data: return jsonify({"error": "No students to export"}), 400 df_to_export = pd.DataFrame(student_data) export_columns = [ 'full_name', 'email', 'department', 'cgpa', 'backlogs', 'programming_languages', 'tools_technologies', 'soft_skills', 'projects', 'internships', 'certifications', 'detailed_certifications', 'hackathons', 'publications', 'patents', 'preferred_roles', 'preferred_location', 'strengths', 'weaknesses', 'typing_speed', 'linkedin_profile', 'github_profile', 'portfolio_url' ] available_export_cols = [col for col in export_columns if col in df_to_export.columns] df_to_export = df_to_export[available_export_cols] except json.JSONDecodeError: print("Error decoding JSON for export") return jsonify({"error": "Invalid data format for export"}), 400 except Exception as e: print(f"Error preparing export data: {e}") return jsonify({"error": f"Server error during export preparation: {e}"}), 500 try: mem_file = io.BytesIO() mem_file.write(u'\ufeff'.encode('utf8')) # BOM for Excel df_to_export.to_csv(mem_file, index=False, encoding='utf-8-sig') mem_file.seek(0) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") response = make_response(send_file( mem_file, mimetype='text/csv', download_name=f'filtered_students_{timestamp}.csv', as_attachment=True )) response.headers['Content-Disposition'] = f"attachment; filename=filtered_students_{timestamp}.csv" response.headers["Content-type"] = "text/csv; charset=utf-8" print(f"Exporting {len(df_to_export)} students to CSV.") return response except Exception as e: print(f"Error sending export file: {e}") return jsonify({"error": f"Server error during file generation: {e}"}), 500 @app.route('/jobs', methods=['GET', 'POST']) def jobs(): """Displays job postings and handles new job creation.""" if 'company_email' not in session: flash("Please log in to manage job postings.", "warning") return redirect(url_for('login')) if not supabase: flash("Database service unavailable.", "danger") return render_template('jobs.html', jobs=[], skills=[], company_name=session.get('company_name', 'Company'), error="Database connection failed") company_id = session.get('company_id') if not company_id: flash("Company session error. Please log in again.", "danger") return redirect(url_for('logout')) # Load student data once for skills list generation df_students = load_student_data() prog_langs = get_unique_values(df_students, 'programming_languages') tools_tech = get_unique_values(df_students, 'tools_technologies') available_skills = sorted(list(set(prog_langs + tools_tech))) if request.method == 'POST': try: required_skills_list = request.form.getlist('required_skills') min_cgpa_str = request.form.get('min_cgpa') experience_str = request.form.get('experience_level', '0') title = request.form.get('title') description = request.form.get('description') location = request.form.get('location') # Basic validation error_msg = None if not title: error_msg = "Job Title is required." elif not description: error_msg = "Job Description is required." elif not required_skills_list: error_msg = "At least one required skill must be selected." elif not location: error_msg = "Location is required." if error_msg: flash(error_msg, "warning") response = supabase.table('jobs').select('*').eq('company_id', company_id).order('posted_date', desc=True).execute() jobs_list = response.data if response.data else [] return render_template('jobs.html', jobs=jobs_list, skills=available_skills, company_name=session['company_name']) new_job_data = { 'company_id': company_id, 'title': title, 'description': description, 'required_skills': required_skills_list, # Store as array 'min_cgpa': float(min_cgpa_str) if min_cgpa_str else None, 'experience_level': int(experience_str) if experience_str.isdigit() else 0, 'location': location } response = supabase.table('jobs').insert(new_job_data).execute() if response.data: flash("New job posted successfully!", "success") # Send email notification about the new job email_sent = send_job_email(new_job_data) if email_sent: flash("Job details sent by email successfully.", "info") else: flash("Job posted but email notification failed to send.", "warning") else: print(f"Job insert response structure: {response}") flash("Failed to post job. Please check details and try again.", "danger") except Exception as e: print(f"Error posting job: {e}") import traceback; traceback.print_exc() flash(f"An error occurred while posting the job: {e}", "danger") return redirect(url_for('jobs')) # Redirect after POST # GET request handling jobs_list = [] try: response_jobs = supabase.table('jobs').select('*').eq('company_id', company_id).order('posted_date', desc=True).execute() jobs_list = response_jobs.data if response_jobs.data else [] except Exception as e: print(f"Error fetching data for jobs page: {e}") flash("Could not load job data.", "warning") return render_template('jobs.html', jobs=jobs_list, skills=available_skills, company_name=session['company_name']) @app.route('/job/') def job_details(job_id): """Displays details for a specific job and eligible candidates.""" if 'company_email' not in session: flash("Please log in to view job details.", "warning") return redirect(url_for('login')) if not supabase: flash("Database service unavailable.", "danger") return render_template('job_details.html', job=None, candidates_full=[], candidates_table=[], company_name=session.get('company_name', 'Company'), error="Database connection failed") job_data = None eligible_candidates_list = [] candidates_full_list = [] company_id = session.get('company_id') try: # Fetch the specific job details response_job = supabase.table('jobs').select('*').eq('id', job_id).maybe_single().execute() job_data = response_job.data if not job_data: flash("Job not found.", "danger") return redirect(url_for('jobs')) if job_data.get('company_id') != company_id: flash("You are not authorized to view details for this job.", "danger") print(f"Unauthorized access attempt to job {job_id} by company {company_id}") return redirect(url_for('jobs')) # Find eligible candidates df_students = load_student_data() if not df_students.empty: filters = {'job_id': job_id} eligible_df = filter_students(df_students.copy(), filters) # Prepare list for the table view (limited columns) display_cols_table = ['full_name', 'email', 'department', 'cgpa', 'programming_languages', 'projects', 'internships'] existing_display_cols_table = [col for col in display_cols_table if col in eligible_df.columns] eligible_candidates_list = eligible_df[existing_display_cols_table].replace({np.nan: None}).to_dict('records') if not eligible_df.empty else [] # Prepare list with full data for the modal candidates_full_list = eligible_df.replace({np.nan: None}).to_dict('records') if not eligible_df.empty else [] else: flash("Could not load student data to find eligible candidates.", "warning") except Exception as e: print(f"Error fetching job details or candidates for job {job_id}: {e}") import traceback; traceback.print_exc() flash("An error occurred while loading job details or finding candidates.", "danger") if not job_data: return redirect(url_for('jobs')) return render_template( 'job_details.html', job=job_data, candidates_full=json.dumps(candidates_full_list), # Pass full data as JSON string for JS candidates_table=eligible_candidates_list, # Data for the HTML table company_name=session.get('company_name', 'Company') ) # --- Error Handlers (Use base.html now) --- @app.errorhandler(404) def page_not_found(e): return render_template('404.html'), 404 @app.errorhandler(500) def internal_server_error(e): print(f"Server Error: {e}") import traceback traceback.print_exc() return render_template('500.html', error=e), 500 # --- Context Processor for Year --- @app.context_processor def inject_now(): return {'now': datetime.utcnow()} # --- Run Application --- if __name__ == '__main__': if not supabase: print("\nWARNING: Supabase client failed to initialize.") print("Application will run but database operations will likely fail.") print("Check your .env file and Supabase project status.\n") port = int(os.environ.get("PORT", 5001)) debug_mode = os.environ.get("FLASK_DEBUG", "true").lower() == "true" print(f"Starting Flask app on port {port} with debug mode: {debug_mode}") app.run(debug=debug_mode, port=port, host='0.0.0.0')