# give path to dataset # load pkl(randomforest,decission tree) # useing streamlit create frontpage # using maping create a function to convert the input from the user which is categorical to the numerical # user should be: enter the fields function will convert the fields in to the numericals and the input will then be given to the model # present result import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix,accuracy_score from sklearn import svm from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier from xgboost import XGBClassifier df=pd.read_csv("https://huggingface.co/spaces/ArchiMathur/career_guidance/blob/main/mldata.csv") from sklearn.preprocessing import LabelEncoder lr=LabelEncoder() for i in obj_col: df[i] = lr.fit(df[i]) x=df.drop('Suggested Job Role',axis=1) y=df['Suggested Job Role'] x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42) rf_model = RandomForestClassifier(random_state = 10) rf_model.fit(x_train, y_train) dtree = DecisionTreeClassifier(random_state=1) dtree = dtree.fit(x_train, y_train) import streamlit as st import pandas as pd import pickle import OS import streamlit as st import pandas as pd import pickle import numpy as np # Categorical to Numerical Mapping Functions def map_categorical_features(): """ Define mapping dictionaries for categorical features """ mappings = { 'self-learning capability?': { 'yes': 1, 'NO': 0, }, 'Extra-courses did': { 'yes': 1, 'NO': 0, }, 'certifications ': { 'information security': 0, 'shell programming': 1, 'r programming': 2, 'distro making': 3, 'machine learning':4, 'full stack':5, 'hadoop':6, 'app development':7, 'python':8 }, 'Workshops': { 'testing': 1, 'database security': 2, 'game development': 3, 'data science': 4, 'system designing': 5, 'hacking':6, 'cloud computing':7, 'web technologies':8 }, 'reading and writing skills': { 'poor': 0, 'excellent': 1, 'medium': 2, }, 'memory capability score': { 'poor': 0, 'excellent': 1, 'medium': 2 }, 'Interested subjects':{ 'programming':0, 'Management':1, 'data engineering':2, 'networks':3, 'Software Engineering':4, 'cloud computing':5, 'parallel computing':6, 'IOT':7, 'Computer Architecture':8, 'hacking':9 }, 'interested career area ':{ 'testing':0, 'system developer':1, 'Business process analyst':2, 'security':3, 'developer':4, 'cloud computing':5 }, 'Type of company want to settle in?':{ 'BPA':0, 'Cloud Services':1, 'product development':2, 'Testing and Maintainance Services':3, 'SAaS services':4, 'Web Services':5, 'Finance':6, 'Sales and Marketing':7, 'Product based':8, 'Service Based':9 }, 'Taken inputs from seniors or elders':{ 'yes':0, 'no':1 }, 'Interested Type of Books':{ 'Series':0, 'Autobiographies':1, 'Travel':2, 'Guide':3, 'Health':4, 'Journals':5, 'Anthology':6, 'Dictionaries':7, 'Prayer books':8, 'Art':9, 'Encyclopedias':10, 'Religion-Spirituality':11, 'Action and Adventure':12, 'Comics':13, 'Horror':14, 'Satire':15, 'Self help':16, 'History':17, 'Cookbooks':18, 'Math':19, 'Biographies':20, 'Drama':21, 'Diaries':22, 'Science fiction':23, 'Poetry':24, 'Romance':25, 'Science':26, "Trilogy":27, "Fantasy":28, "Childrens" } } return mappings def convert_categorical_to_numerical(input_data, mappings): numerical_data = {} # Process each feature for feature, value in input_data.items(): # Check if feature has a mapping if feature in mappings: # If it's a list/multiselect, handle multiple values if isinstance(value, list): # Convert multiple categorical values to sum of their numerical mappings numerical_data[feature] = sum([mappings[feature].get(v, 0) for v in value]) else: # Single value mapping numerical_data[feature] = mappings[feature].get(value, 0) else: # If no mapping, assume it's already numerical numerical_data[feature] = value return numerical_data def main(): st.title('Job Role Prediction System') # Get categorical mappings mappings = map_categorical_features() # Input form with st.form('job_prediction_form'): st.header('Candidate Profile') # Create columns for better layout col1, col2 = st.columns(2) with col1: # Categorical Inputs self-learning capability? = st.selectbox('self-learning capability?', list(mappings['self-learning capability?'].keys())) Extra-courses did = st.selectbox('Extra-courses did', list(mappings['Extra-courses did'].keys())) certifications = st.selectbox('certifications', list(mappings['certifications'].keys())) with col2: # Multiple skill selection workshops = st.multiselect('workshops', list(mappings['workshops'].keys())) reading and writing skills = st.selectbox('reading and writing skills', list(mappings['reading and writing skills'].keys())) memory capability score = st.selectbox('memory capability score ', list(mappings['memory capability score '].keys())) # Numerical Inputs col3, col4 = st.columns(2) with col3: Logical quotient rating = st.number_input('Age', min_value=18, max_value=65, value=25) hackathons = st.slider('Technical Proficiency', 0, 10, 5) with col4: coding skills rating = st.slider('coding skills rating', 0, 10, 5) public speaking points = st.number_input('public speaking points', min_value=0, max_value=50, value=0) # Submit button submit_button = st.form_submit_button('Predict Job Role') # Prediction logic if submit_button: # Prepare input data input_data = { 'self-learning capability?': self-learning capability?, 'Extra-courses did ': Extra-courses did , 'certifications': certifications, 'workshops': workshops, 'reading and writing skills': reading and writing skills, 'memory capability score': memory capability score, 'Logical quotient rating': Logical quotient rating, 'hackathons': hackathons, 'coding skills rating': coding skills rating, 'public speaking points': public speaking points } # Convert categorical to numerical try: numerical_input = convert_categorical_to_numerical(input_data, mappings) # Display converted numerical values st.subheader('Converted Numerical Features') numerical_df = pd.DataFrame.from_dict(numerical_input, orient='index', columns=['Numerical Value']) st.dataframe(numerical_df) # Here you would typically load your model and predict # For demonstration, I'll use a mock prediction job_roles = ['Software Engineer', 'Data Scientist', 'Product Manager', 'Cloud Architect', 'AI Researcher'] predicted_role = np.random.choice(job_roles) st.success(f'Predicted Job Role: {predicted_role}') except Exception as e: st.error(f'Error in prediction: {str(e)}') # Sidebar for additional information def add_sidebar(): st.sidebar.header('About the App') st.sidebar.info(''' Job Role Prediction System - Converts categorical inputs to numerical - Provides insight into feature mapping - Suggests potential job roles ''') if __name__ == '__main__': add_sidebar() main()