Spaces:

ArchiMathur
/

project

Sleeping

File size: 20,353 Bytes

import gradio as gr
import pandas as pd 
import numpy as np 
import pickle
import sklearn
from datasets import load_dataset
import joblib
import requests

# Read the data
data = pd.read_csv("mldata.csv")

# Function to load model
def load_model():
    with open('rfweights.pkl', 'rb') as pickleFile:
        return pickle.load(pickleFile)

# Prepare categorical data 
categorical_cols = data[[
    'certifications',
    'workshops',
    'Interested subjects',
    'interested career area ',
    'Type of company want to settle in?',
    'Interested Type of Books'
]].copy()

# Assign category codes
for i in categorical_cols:
    data[i] = data[i].astype('category')
    data[i] = data[i].cat.codes

# Create reference dictionaries for embeddings
def create_embedding_dict(column):
    unique_names = list(categorical_cols[column].unique())
    unique_codes = list(data[column].unique())
    return dict(zip(unique_names, unique_codes))

certificates_references = create_embedding_dict('certifications')
workshop_references = create_embedding_dict('workshops')
subjects_interest_references = create_embedding_dict('Interested subjects')
career_interest_references = create_embedding_dict('interested career area ')
company_intends_references = create_embedding_dict('Type of company want to settle in?')
book_interest_references = create_embedding_dict('Interested Type of Books')

# Career-specific job data
CAREER_JOB_DATA = {
    "Software Engineer": [
        ["Software Engineer", "Mindtree Ltd", "Bangalore, Karnataka", "₹5,50,000 - ₹11,00,000"],
        ["Software Developer", "Mphasis", "Pune, Maharashtra", "₹5,00,000 - ₹9,50,000"],
        ["Full Stack Developer", "Persistent Systems", "Hyderabad, Telangana", "₹6,00,000 - ₹12,00,000"],
        ["Backend Engineer", "Zensar Technologies", "Mumbai, Maharashtra", "₹5,80,000 - ₹10,50,000"],
        ["Junior Software Engineer", "Cyient", "Chennai, Tamil Nadu", "₹4,20,000 - ₹7,80,000"]
    ],
    "Software Developer": [
        ["Software Developer", "LTI (L&T Infotech)", "Bangalore, Karnataka", "₹4,80,000 - ₹9,20,000"],
        ["Application Developer", "Hexaware Technologies", "Hyderabad, Telangana", "₹5,20,000 - ₹9,80,000"],
        ["Java Developer", "Birlasoft", "Pune, Maharashtra", "₹5,50,000 - ₹10,50,000"],
        ["Python Developer", "Sonata Software", "Noida, UP", "₹6,00,000 - ₹11,50,000"],
        ["Software Engineer Trainee", "Larsen & Toubro Technology", "Mumbai, Maharashtra", "₹3,80,000 - ₹6,50,000"]
    ],
    "Web Developer": [
        ["Frontend Developer", "Nagarro", "Gurgaon, Haryana", "₹6,50,000 - ₹13,00,000"],
        ["Full Stack Web Developer", "Publicis Sapient", "Bangalore, Karnataka", "₹7,20,000 - ₹14,50,000"],
        ["React Developer", "ThoughtWorks", "Pune, Maharashtra", "₹8,00,000 - ₹16,00,000"],
        ["Web Developer", "Xoriant", "Mumbai, Maharashtra", "₹5,50,000 - ₹11,00,000"],
        ["UI Developer", "Synechron", "Bangalore, Karnataka", "₹6,80,000 - ₹13,50,000"]
    ],
    "Mobile Applications Developer": [
        ["Android Developer", "Mindtree Ltd", "Bangalore, Karnataka", "₹7,50,000 - ₹15,00,000"],
        ["iOS Developer", "Cybage", "Pune, Maharashtra", "₹7,00,000 - ₹14,00,000"],
        ["Flutter Developer", "QuEST Global", "Bangalore, Karnataka", "₹6,50,000 - ₹13,00,000"],
        ["React Native Developer", "NIIT Technologies", "Noida, UP", "₹6,00,000 - ₹12,00,000"],
        ["Mobile App Developer", "iGate (Capgemini)", "Hyderabad, Telangana", "₹5,80,000 - ₹11,50,000"]
    ],
    "Database Developer": [
        ["Database Developer", "Mastek", "Mumbai, Maharashtra", "₹6,50,000 - ₹13,00,000"],
        ["SQL Developer", "Virtusa", "Hyderabad, Telangana", "₹7,00,000 - ₹14,00,000"],
        ["Database Administrator", "Polaris Consulting", "Chennai, Tamil Nadu", "₹6,20,000 - ₹12,50,000"],
        ["Data Engineer", "Altimetrik", "Bangalore, Karnataka", "₹7,50,000 - ₹15,00,000"],
        ["Big Data Developer", "Sasken Technologies", "Bangalore, Karnataka", "₹7,80,000 - ₹15,50,000"]
    ],
    "Network Security Engineer": [
        ["Security Engineer", "Quick Heal Technologies", "Pune, Maharashtra", "₹6,50,000 - ₹13,00,000"],
        ["Cybersecurity Analyst", "Paladion Networks", "Bangalore, Karnataka", "₹6,00,000 - ₹12,00,000"],
        ["Network Security Specialist", "K7 Computing", "Chennai, Tamil Nadu", "₹7,00,000 - ₹14,00,000"],
        ["Information Security Analyst", "SecureKloud", "Chennai, Tamil Nadu", "₹6,80,000 - ₹13,50,000"],
        ["Security Operations Analyst", "Sequretek", "Bangalore, Karnataka", "₹5,80,000 - ₹11,50,000"]
    ],
    "UX Designer": [
        ["UX Designer", "Think Design", "Bangalore, Karnataka", "₹5,50,000 - ₹12,00,000"],
        ["UI/UX Designer", "F5 Studio", "Mumbai, Maharashtra", "₹5,00,000 - ₹11,00,000"],
        ["Product Designer", "Lollypop Design", "Bangalore, Karnataka", "₹6,00,000 - ₹13,00,000"],
        ["Visual Designer", "Designit (Wipro)", "Pune, Maharashtra", "₹5,80,000 - ₹12,50,000"],
        ["UX Researcher", "Happy Marketer", "Gurgaon, Haryana", "₹5,20,000 - ₹11,50,000"]
    ],
    "Software Quality Assurance (QA)/ Testing": [
        ["QA Engineer", "Cigniti Technologies", "Hyderabad, Telangana", "₹4,20,000 - ₹8,50,000"],
        ["Software Tester", "TestingXperts", "Mumbai, Maharashtra", "₹3,80,000 - ₹7,80,000"],
        ["Automation Test Engineer", "Qualitest", "Pune, Maharashtra", "₹5,00,000 - ₹10,00,000"],
        ["QA Analyst", "QA InfoTech", "Noida, UP", "₹4,50,000 - ₹9,00,000"],
        ["Test Lead", "Maveric Systems", "Bangalore, Karnataka", "₹6,50,000 - ₹13,00,000"]
    ],
    "Technical Support": [
        ["Technical Support Engineer", "Happiest Minds", "Bangalore, Karnataka", "₹3,20,000 - ₹6,50,000"],
        ["IT Support Specialist", "Rolta India", "Mumbai, Maharashtra", "₹2,80,000 - ₹5,80,000"],
        ["Desktop Support Engineer", "Fujitsu Consulting", "Pune, Maharashtra", "₹3,00,000 - ₹6,00,000"],
        ["Technical Support Associate", "iYogi Technical Services", "Gurgaon, Haryana", "₹3,50,000 - ₹7,00,000"],
        ["Help Desk Technician", "CSS Corp", "Chennai, Tamil Nadu", "₹2,80,000 - ₹5,50,000"]
    ],
    "Systems Security Administrator": [
        ["System Administrator", "Kale Logistics", "Pune, Maharashtra", "₹4,50,000 - ₹9,00,000"],
        ["Linux Administrator", "Sify Technologies", "Chennai, Tamil Nadu", "₹5,50,000 - ₹11,00,000"],
        ["Windows System Admin", "Netmagic Solutions", "Mumbai, Maharashtra", "₹5,20,000 - ₹10,50,000"],
        ["Cloud Administrator", "CtrlS Datacenters", "Hyderabad, Telangana", "₹6,50,000 - ₹13,00,000"],
        ["DevOps Engineer", "Genpact", "Bangalore, Karnataka", "₹7,00,000 - ₹14,00,000"]
    ],
    "Applications Developer": [
        ["Application Developer", "3i Infotech", "Mumbai, Maharashtra", "₹5,50,000 - ₹11,00,000"],
        ["Enterprise App Developer", "Ramco Systems", "Chennai, Tamil Nadu", "₹6,20,000 - ₹12,50,000"],
        ["Software Application Engineer", "Newgen Software", "Noida, UP", "₹6,50,000 - ₹13,00,000"],
        ["Business Application Developer", "Aurionpro Solutions", "Mumbai, Maharashtra", "₹5,80,000 - ₹11,50,000"],
        ["Custom App Developer", "Nucleus Software", "Noida, UP", "₹6,00,000 - ₹12,00,000"]
    ],
    "CRM Technical Developer": [
        ["Salesforce Developer", "Tech Mahindra", "Pune, Maharashtra", "₹6,50,000 - ₹13,00,000"],
        ["CRM Developer", "HGS (Hinduja Global)", "Bangalore, Karnataka", "₹6,00,000 - ₹12,00,000"],
        ["Dynamics 365 Developer", "L&T Technology Services", "Vadodara, Gujarat", "₹6,80,000 - ₹13,50,000"],
        ["CRM Technical Consultant", "Firstsource Solutions", "Mumbai, Maharashtra", "₹6,20,000 - ₹12,50,000"],
        ["Salesforce Administrator", "WNS Global Services", "Pune, Maharashtra", "₹5,00,000 - ₹10,00,000"]
    ]
}

# Function to fetch job listings
def fetch_job_listings(job_title):
    """Fetch job listings - tries API first, then falls back to curated data"""
    
    # Try API first
    api_key = '714f5a2539msh798d996c3243876p19c71ajsnfcd7ce481cb9'
    url = "https://jsearch.p.rapidapi.com/search"
    
    querystring = {
        "query": f"{job_title} in India",
        "page": "1",
        "num_pages": "1",
        "date_posted": "all"
    }
    
    headers = {
        "x-rapidapi-key": api_key,
        "x-rapidapi-host": "jsearch.p.rapidapi.com"
    }

    try:
        response = requests.get(url, headers=headers, params=querystring, timeout=10)
        
        print(f"JSearch API Response Status: {response.status_code}")
        
        if response.status_code == 200:
            job_data = response.json()
            
            if job_data.get('data') and len(job_data['data']) > 0:
                job_listings = []
                for job in job_data['data'][:5]:
                    salary = "Not specified"
                    if job.get('job_min_salary') and job.get('job_max_salary'):
                        min_sal = job.get('job_min_salary')
                        max_sal = job.get('job_max_salary')
                        currency = job.get('job_salary_currency', 'INR')
                        if currency == 'INR':
                            salary = f"₹{min_sal:,.0f} - ₹{max_sal:,.0f}"
                        else:
                            salary = f"{currency} {min_sal:,.0f} - {max_sal:,.0f}"
                    elif job.get('job_min_salary'):
                        min_sal = job.get('job_min_salary')
                        currency = job.get('job_salary_currency', 'INR')
                        salary = f"₹{min_sal:,.0f}+" if currency == 'INR' else f"{currency} {min_sal:,.0f}+"
                    
                    location_parts = []
                    if job.get('job_city'):
                        location_parts.append(job.get('job_city'))
                    if job.get('job_state'):
                        location_parts.append(job.get('job_state'))
                    location = ', '.join(location_parts) if location_parts else job.get('job_country', 'India')
                    
                    job_listings.append([
                        job.get('job_title', 'N/A'),
                        job.get('employer_name', 'N/A'),
                        location,
                        salary
                    ])
                
                print(f"Successfully fetched {len(job_listings)} real jobs from API")
                return job_listings
    except Exception as e:
        print(f"API failed: {str(e)}, using curated data")
    
    # Fallback to curated career-specific data
    if job_title in CAREER_JOB_DATA:
        print(f"Using curated data for {job_title}")
        return CAREER_JOB_DATA[job_title]
    
    # Generic fallback
    return [
        [f"{job_title} (Entry Level)", "Various IT Companies", "Bangalore, Karnataka", "₹4,00,000 - ₹8,00,000"],
        [f"{job_title} (Mid Level)", "Various IT Companies", "Hyderabad, Telangana", "₹7,00,000 - ₹14,00,000"],
        [f"{job_title} (Senior)", "Various IT Companies", "Pune, Maharashtra", "₹12,00,000 - ₹24,00,000"],
        [f"{job_title} Intern", "Startups & IT Firms", "Mumbai, Maharashtra", "₹2,00,000 - ₹4,00,000"],
        ["💡 Job Search", "Check: Naukri, LinkedIn, Indeed", "India (Remote/Onsite)", "Apply to 10+ daily"]
    ]

# Prediction function (modified to return job suggestions)
def rfprediction(name, logical_thinking, hackathon_attend, coding_skills, public_speaking_skills,
                 self_learning, extra_course, certificate_code, worskhop_code, read_writing_skill, memory_capability,
                 subject_interest, career_interest, company_intend, senior_elder_advise, book_interest, introvert_extro,
                 team_player, management_technical, smart_hardworker):
    
    try:
        # Load the Random Forest model
        rfmodel = load_model()
        
        # Create DataFrame 
        df = pd.DataFrame({
            "logical_thinking": [logical_thinking],
            "hackathon_attend": [hackathon_attend],
            "coding_skills": [coding_skills],
            "public_speaking_skills": [public_speaking_skills],
            "self_learning": [self_learning],
            "extra_course": [extra_course],
            "certificate": [certificate_code],
            "workshop": [worskhop_code],
            "read_writing_skills": [
                (0 if "poor" in read_writing_skill else 1 if "medium" in read_writing_skill else 2)
            ],
            "memory_capability": [
                (0 if "poor" in memory_capability else 1 if "medium" in memory_capability else 2)   
            ],
            "subject_interest": [subject_interest],
            "career_interest": [career_interest],
            "company_intend": [company_intend],
            "senior_elder_advise": [senior_elder_advise],
            "book_interest": [book_interest],
            "introvert_extro": [introvert_extro],
            "team_player": [team_player],
            "management_technical": [management_technical],
            "smart_hardworker": [smart_hardworker]
        })
        
        # Replace string values with numeric representations - FIX for FutureWarning
        replacement_dict = {
            "certificate": certificates_references, 
            "workshop": workshop_references,
            "subject_interest": subjects_interest_references,
            "career_interest": career_interest_references,
            "company_intend": company_intends_references,
            "book_interest": book_interest_references
        }
        
        for col, mapping in replacement_dict.items():
            if col in df.columns:
                df[col] = df[col].map(mapping)
        
        # Dummy encoding 
        userdata_list = df.values.tolist()
        
        # Management-Technical dummy encoding
        if df["management_technical"].values[0] == "Management":
            userdata_list[0].extend([1, 0])
            userdata_list[0].remove('Management')
        elif df["management_technical"].values[0] == "Technical":
            userdata_list[0].extend([0, 1])
            userdata_list[0].remove('Technical')
        else: 
            return {"Error": 1.0}, [["Error in Management-Technical encoding", "", "", ""]]

        # Smart-Hard worker dummy encoding
        if df["smart_hardworker"].values[0] == "smart worker":
            userdata_list[0].extend([1, 0])
            userdata_list[0].remove('smart worker')
        elif df["smart_hardworker"].values[0] == "hard worker":
            userdata_list[0].extend([0, 1])
            userdata_list[0].remove('hard worker')
        else: 
            return {"Error": 1.0}, [["Error in Smart-Hard worker encoding", "", "", ""]]

        # Convert to numpy array for prediction
        userdata_array = np.array(userdata_list)
        
        # Prediction
        prediction_result_all = rfmodel.predict_proba(userdata_array)
        
        # Create result dictionary with probabilities
        result_list = {
            "Applications Developer": float(prediction_result_all[0][0]),
            "CRM Technical Developer": float(prediction_result_all[0][1]),
            "Database Developer": float(prediction_result_all[0][2]),
            "Mobile Applications Developer": float(prediction_result_all[0][3]),
            "Network Security Engineer": float(prediction_result_all[0][4]),
            "Software Developer": float(prediction_result_all[0][5]),
            "Software Engineer": float(prediction_result_all[0][6]),
            "Software Quality Assurance (QA)/ Testing": float(prediction_result_all[0][7]),
            "Systems Security Administrator": float(prediction_result_all[0][8]),
            "Technical Support": float(prediction_result_all[0][9]),
            "UX Designer": float(prediction_result_all[0][10]),
            "Web Developer": float(prediction_result_all[0][11]),
        }
        
        # Find the top predicted career
        top_career = max(result_list, key=result_list.get)
        
        # Fetch job listings for the top predicted career
        job_suggestions = fetch_job_listings(top_career)
        
        return result_list, job_suggestions
    
    except Exception as e:
        error_msg = f"Error during prediction: {str(e)}"
        return {"Error": 1.0}, [[error_msg, "", "", ""]]

# Lists for dropdown menus
cert_list = ["app development", "distro making", "full stack", "hadoop", "information security", "machine learning", "python", "r programming", "shell programming"]
workshop_list = ["cloud computing", "data science", "database security", "game development", "hacking", "system designing", "testing", "web technologies"]
skill = ["excellent", "medium", "poor"]
subject_list = ["cloud computing", "Computer Architecture", "data engineering", "hacking", "IOT", "Management", "networks", "parallel computing", "programming", "Software Engineering"]
career_list = ["Business process analyst", "cloud computing", "developer", "security", "system developer", "testing"]
company_list = ["BPA", "Cloud Services", "Finance", "Product based", "product development", "SAaS services", "Sales and Marketing", "Service Based", "Testing and Maintainance Services", "Web Services"]
book_list = ["Action and Adventure", "Anthology", "Art", "Autobiographies", "Biographies", "Childrens", "Comics","Cookbooks","Diaries","Dictionaries","Drama","Encyclopedias","Fantasy","Guide","Health","History","Horror","Journals","Math","Mystery","Poetry","Prayer books","Religion-Spirituality","Romance","Satire","Science","Science fiction","Self help","Series","Travel","Trilogy"]
Choice_list = ["Management", "Technical"]
worker_list = ["hard worker", "smart worker"]

# Create Gradio interface 
def create_output_component():
    return [
        gr.Label(label="Career Probabilities"),
        gr.Dataframe(
            headers=["Job Title", "Company", "Location", "Salary"],
            label="Job Suggestions"
        )
    ]

demo = gr.Interface(
    fn=rfprediction, 
    inputs=[
        gr.Textbox(placeholder="What is your name?", label="Name"),
        gr.Slider(minimum=1, maximum=9, value=3, step=1, label="Are you a logical thinking person?", info="Scale: 1 - 9"),
        gr.Slider(minimum=0, maximum=6, value=0, step=1, label="Do you attend any Hackathons?", info="Scale: 0 - 6 | 0 - if not attended any"),
        gr.Slider(minimum=1, maximum=9, value=5, step=1, label="How do you rate your coding skills?", info="Scale: 1 - 9"),
        gr.Slider(minimum=1, maximum=9, value=3, step=1, label="How do you rate your public speaking skills/confidency?", info="Scale: 1 - 9"),
        gr.Radio(["Yes", "No"], type="index", label="Are you a self-learning person? *"),
        gr.Radio(["Yes", "No"], type="index", label="Do you take extra courses in uni (other than IT)? *"),
        gr.Dropdown(cert_list, label="Select a certificate you took!"),
        gr.Dropdown(workshop_list, label="Select a workshop you attended!"),
        gr.Dropdown(skill, label="Select your read and writing skill"),
        gr.Dropdown(skill, label="Is your memory capability good?"),
        gr.Dropdown(subject_list, label="What subject you are interested in?"),
        gr.Dropdown(career_list, label="Which IT-Career do you have interests in?"),
        gr.Dropdown(company_list, label="Do you have any interested company that you intend to settle in?"),
        gr.Radio(["Yes", "No"], type="index", label="Do you ever seek any advices from senior or elders? *"),
        gr.Dropdown(book_list, label="Select your interested genre of book!"),
        gr.Radio(["Yes", "No"], type="index", label="Are you an Introvert?| No - extrovert *"),
        gr.Radio(["Yes", "No"], type="index", label="Ever worked in a team? *"),
        gr.Dropdown(Choice_list, label="Which area do you prefer: Management or Technical?"),
        gr.Dropdown(worker_list, label="Are you a Smart worker or Hard worker?")
    ],
    outputs=create_output_component(), 
    title="AI-Enhanced Career Guidance System"
)

# Main execution
if __name__ == "__main__":
    demo.launch(share=False)