import pandas as pd import streamlit as st import joblib import time from datetime import datetime from sklearn.pipeline import Pipeline from sklearn.compose import ColumnTransformer from sklearn.preprocessing import StandardScaler, OneHotEncoder from PIL import Image, ImageFile ImageFile.LOAD_TRUNCATED_IMAGES = True # Disables the check # ============================================= # SETUP & CONFIGURATION # ============================================= def set_page_config(): st.set_page_config( page_title="WageWise", page_icon="💼", layout="wide", initial_sidebar_state="expanded" ) # Inject custom CSS with dark teal color scheme st.markdown(""" """, unsafe_allow_html=True) # ============================================= # DATA & MODEL LOADING # ============================================= @st.cache_data def load_data(): df = pd.read_csv('cleaned_job_salaries.csv') if 'Posting Date' in df.columns: df['Posting Day'] = pd.to_datetime(df['Posting Date']).dt.day df['Posting Month'] = pd.to_datetime(df['Posting Date']).dt.month return df @st.cache_resource def load_model(): model = joblib.load('best_decision_tree_model2.pkl') preprocessor = ColumnTransformer( transformers=[ ('num', StandardScaler(), ['# Of Positions', 'min_experience', 'license_required','bar_admission','driver_license_required', 'Posting Day','Posting Month']), ('cat', OneHotEncoder(), ['Agency', 'Posting Type', 'Business Title', 'Title Classification', 'Job Category', 'Full-Time/Part-Time indicator', 'Career Level', 'Salary Frequency', 'Level Description', 'required_degree', 'has_communication_skills']) ] ) pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('regressor', model)]) df = load_data() pipeline.fit(df[['Agency', 'Posting Type', '# Of Positions', 'Business Title', 'Title Classification', 'Job Category', 'Full-Time/Part-Time indicator', 'Career Level', 'Salary Frequency', 'Level Description', 'required_degree', 'min_experience', 'license_required', 'bar_admission', 'driver_license_required', 'Posting Day', 'Posting Month', 'has_communication_skills']], df[['Salary']]) return pipeline # ============================================= # COMPONENTS # ============================================= def prediction_card(title, value, job_title, career_level, icon="💵"): st.markdown(f"""
Monthly: ${value/12:,.2f}
{job_title} | {career_level}
{label}
=0 else "#ef4444"}; font-size: 14px; margin: 4px 0 0;">{"+" if change >=0 else ""}{change if change is not None else ""}
' if change is not None else ''}AI-powered salary predictions with market intelligence
Business Title
', unsafe_allow_html=True) BusinessTitle = st.selectbox('Business Title', df['Business Title'].unique(), label_visibility="collapsed") Agency = df[df['Business Title'] == BusinessTitle]['Agency'].mode().iloc[0] # Posting Type with visible label st.markdown('Posting Type
', unsafe_allow_html=True) PostingType = st.selectbox('Posting Type', df['Posting Type'].unique(), label_visibility="collapsed") # Experience with visible label st.markdown('Experience (years)
', unsafe_allow_html=True) MinExperience = st.number_input('Experience (years)', 0, 50, 3, label_visibility="collapsed") # Title Classification with visible label st.markdown('Title Classification
', unsafe_allow_html=True) TitleClassification = st.selectbox('Title Classification', df['Title Classification'].unique(), label_visibility="collapsed") JobCategory = df[df['Business Title'] == BusinessTitle]['Job Category'].mode().iloc[0] # Employment Type with visible label st.markdown('Employment Type
', unsafe_allow_html=True) FullOrPartTime = st.selectbox('Employment Type', df['Full-Time/Part-Time indicator'].unique(), label_visibility="collapsed") # Career Level with visible label st.markdown('Career Level
', unsafe_allow_html=True) CareerLevel = st.selectbox('Career Level', df['Career Level'].unique(), label_visibility="collapsed") # Required Degree with visible label st.markdown('Required Degree
', unsafe_allow_html=True) RequiredDegree = st.selectbox('Required Degree', df['required_degree'].unique(), label_visibility="collapsed") st.markdown('', unsafe_allow_html=True) st.markdown('License Required
', unsafe_allow_html=True) LicenseRequired_option = st.selectbox('License Required', list(options.keys()), label_visibility="collapsed") LicenseRequired = options[LicenseRequired_option] # Bar Admission with visible label st.markdown('Bar Admission
', unsafe_allow_html=True) BarAdmission_option = st.selectbox('Bar Admission', list(options.keys()), label_visibility="collapsed") BarAdmission = options[BarAdmission_option] # Driver License with visible label st.markdown('Driver License
', unsafe_allow_html=True) DriverLicenseRequired_option = st.selectbox('Driver License', list(options.keys()), label_visibility="collapsed") DriverLicenseRequired = options[DriverLicenseRequired_option] communication_options = {'Yes': True, 'No': False} # Communication Skills with visible label st.markdown('Communication Skills
', unsafe_allow_html=True) hasCommunicationSkills = communication_options[st.selectbox('Communication Skills', list(communication_options.keys()), label_visibility="collapsed")] career_to_level_description = { 'Intern / Student Role': ['Intern/Trainee'], 'Entry-Level Professional': ['Junior-Level', 'Entry Specialist A', 'Entry Specialist B'], 'Experienced Professional': ['Mid-Level', 'Specialist Level A', 'Specialist Level B', 'Advanced Tech Level'], 'Mid-Level Manager': ['Manager Level 1', 'Manager Level 2', 'Manager Level 3', 'Manager Level 4', 'Manager Level 5'], 'Executive / Senior Leadership': ['Executive Manager 1', 'Executive Manager 2', 'Executive Manager 3', 'Lead-Level', 'Mayoral Appointee'] } LevelDescription = career_to_level_description.get(CareerLevel, [None])[0] df_forecast = pd.read_excel('forecast.xlsx') if BusinessTitle in df_forecast["Business Titles"].values: current_date = datetime.now() date = f'{current_date.month:02d}.2025' NumberOfPositions = df_forecast.loc[df_forecast["Business Titles"] == BusinessTitle, date].values[0] else: df = pd.read_csv('cleaned_job_salaries.csv') NumberOfPositions = df.loc[df["Business Title"] == BusinessTitle]['# Of Positions'].mean() if st.button('Predict Salary', use_container_width=True): st.session_state.predict_clicked = True # Main content if 'predict_clicked' in st.session_state and st.session_state.predict_clicked: pipeline = load_model() current_date = datetime.today().date() input_data = pd.DataFrame({ 'Agency': [Agency], 'Posting Type': [PostingType], '# Of Positions': [NumberOfPositions], 'Business Title': [BusinessTitle], 'Title Classification': [TitleClassification], 'Job Category': [JobCategory], 'Full-Time/Part-Time indicator': [FullOrPartTime], 'Career Level': [CareerLevel], 'Salary Frequency': ["Annual"], 'Level Description': [LevelDescription], 'required_degree': [RequiredDegree], 'min_experience': [MinExperience], 'license_required': [LicenseRequired], 'bar_admission': [BarAdmission], 'driver_license_required': [DriverLicenseRequired], 'Posting Day': [current_date.day], 'Posting Month': [current_date.month], 'has_communication_skills': [hasCommunicationSkills], }) with st.spinner('Analyzing job details with our AI model...'): time.sleep(1.5) predicted_salary = pipeline.predict(input_data)[0] # Results section st.markdown("## Prediction Results") # Check if NumberOfPositions came from forecast and is not zero show_positions = (BusinessTitle in df_forecast["Business Titles"].values) and (NumberOfPositions != 0) if show_positions: prediction_card( "Annual Salary", predicted_salary, f"{BusinessTitle} | {NumberOfPositions:.0f} positions available", CareerLevel, "💵" ) else: prediction_card( "Annual Salary", predicted_salary, BusinessTitle, CareerLevel, "💵" ) # Job details st.markdown("### Job Summary") col1, col2, col3 = st.columns(3) with col1: company_metric("Position Type", FullOrPartTime) with col2: company_metric("Experience Required", f"{MinExperience} years") with col3: company_metric("Education Level", RequiredDegree) st.markdown(""" """, unsafe_allow_html=True) if __name__ == '__main__': main()