{ "cells": [ { "cell_type": "code", "execution_count": 20, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "lv2nI8oDZBwx", "outputId": "a0783fd9-bab8-4f2d-a35c-2ec12ea39da4" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: sqlalchemy in /usr/local/lib/python3.12/dist-packages (2.0.44)\n", "Requirement already satisfied: psycopg2 in /usr/local/lib/python3.12/dist-packages (2.9.11)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.12/dist-packages (2.2.2)\n", "Requirement already satisfied: greenlet>=1 in /usr/local/lib/python3.12/dist-packages (from sqlalchemy) (3.2.4)\n", "Requirement already satisfied: typing-extensions>=4.6.0 in /usr/local/lib/python3.12/dist-packages (from sqlalchemy) (4.15.0)\n", "Requirement already satisfied: numpy>=1.26.0 in /usr/local/lib/python3.12/dist-packages (from pandas) (2.0.2)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/dist-packages (from pandas) (2.9.0.post0)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas) (2025.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas) (2025.2)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n" ] } ], "source": [ "!pip install sqlalchemy psycopg2 pandas" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 360 }, "id": "4jJ55ColUV5d", "outputId": "df044c10-364c-4c77-ea53-f3d257f213db" }, "outputs": [ { "data": { "application/vnd.google.colaboratory.intrinsic+json": { "summary": "{\n \"name\": \"data\",\n \"rows\": 15000,\n \"fields\": [\n {\n \"column\": \"job_id\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 15000,\n \"samples\": [\n \"AI11500\",\n \"AI06476\",\n \"AI13168\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"job_title\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 20,\n \"samples\": [\n \"AI Research Scientist\",\n \"Data Scientist\",\n \"Head of AI\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"salary_usd\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 60260,\n \"min\": 32519,\n \"max\": 399095,\n \"num_unique_values\": 14315,\n \"samples\": [\n 121638,\n 125960,\n 47012\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"salary_currency\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"USD\",\n \"EUR\",\n \"GBP\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"experience_level\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"EN\",\n \"EX\",\n \"SE\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"employment_type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"FL\",\n \"FT\",\n \"CT\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"company_location\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 20,\n \"samples\": [\n \"China\",\n \"Denmark\",\n \"Australia\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"company_size\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"M\",\n \"L\",\n \"S\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"employee_residence\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 20,\n \"samples\": [\n \"China\",\n \"Japan\",\n \"Switzerland\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"remote_ratio\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 40,\n \"min\": 0,\n \"max\": 100,\n \"num_unique_values\": 3,\n \"samples\": [\n 50,\n 100,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"required_skills\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 13663,\n \"samples\": [\n \"Spark, Python, Hadoop\",\n \"PyTorch, Kubernetes, Computer Vision, Docker\",\n \"Git, Hadoop, Java, PyTorch\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"education_required\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Master\",\n \"PhD\",\n \"Bachelor\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"years_experience\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5,\n \"min\": 0,\n \"max\": 19,\n \"num_unique_values\": 20,\n \"samples\": [\n 9,\n 13,\n 19\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"industry\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"Energy\",\n \"Real Estate\",\n \"Automotive\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"posting_date\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 486,\n \"samples\": [\n \"2024-12-01\",\n \"2024-08-15\",\n \"2024-04-16\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"application_deadline\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 543,\n \"samples\": [\n \"2025-07-06\",\n \"2025-07-01\",\n \"2024-03-18\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"job_description_length\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 576,\n \"min\": 500,\n \"max\": 2499,\n \"num_unique_values\": 2000,\n \"samples\": [\n 1520,\n 571,\n 1113\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"benefits_score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.4508696496026827,\n \"min\": 5.0,\n \"max\": 10.0,\n \"num_unique_values\": 51,\n \"samples\": [\n 8.4,\n 9.5,\n 8.3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"company_name\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 16,\n \"samples\": [\n \"Smart Analytics\",\n \"TechCorp Inc\",\n \"Neural Networks Co\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", "type": "dataframe", "variable_name": "data" }, "text/html": [ "\n", "
| \n", " | job_id | \n", "job_title | \n", "salary_usd | \n", "salary_currency | \n", "experience_level | \n", "employment_type | \n", "company_location | \n", "company_size | \n", "employee_residence | \n", "remote_ratio | \n", "required_skills | \n", "education_required | \n", "years_experience | \n", "industry | \n", "posting_date | \n", "application_deadline | \n", "job_description_length | \n", "benefits_score | \n", "company_name | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "AI00001 | \n", "AI Research Scientist | \n", "90376 | \n", "USD | \n", "SE | \n", "CT | \n", "China | \n", "M | \n", "China | \n", "50 | \n", "Tableau, PyTorch, Kubernetes, Linux, NLP | \n", "Bachelor | \n", "9 | \n", "Automotive | \n", "2024-10-18 | \n", "2024-11-07 | \n", "1076 | \n", "5.9 | \n", "Smart Analytics | \n", "
| 1 | \n", "AI00002 | \n", "AI Software Engineer | \n", "61895 | \n", "USD | \n", "EN | \n", "CT | \n", "Canada | \n", "M | \n", "Ireland | \n", "100 | \n", "Deep Learning, AWS, Mathematics, Python, Docker | \n", "Master | \n", "1 | \n", "Media | \n", "2024-11-20 | \n", "2025-01-11 | \n", "1268 | \n", "5.2 | \n", "TechCorp Inc | \n", "
| 2 | \n", "AI00003 | \n", "AI Specialist | \n", "152626 | \n", "USD | \n", "MI | \n", "FL | \n", "Switzerland | \n", "L | \n", "South Korea | \n", "0 | \n", "Kubernetes, Deep Learning, Java, Hadoop, NLP | \n", "Associate | \n", "2 | \n", "Education | \n", "2025-03-18 | \n", "2025-04-07 | \n", "1974 | \n", "9.4 | \n", "Autonomous Tech | \n", "
| 3 | \n", "AI00004 | \n", "NLP Engineer | \n", "80215 | \n", "USD | \n", "SE | \n", "FL | \n", "India | \n", "M | \n", "India | \n", "50 | \n", "Scala, SQL, Linux, Python | \n", "PhD | \n", "7 | \n", "Consulting | \n", "2024-12-23 | \n", "2025-02-24 | \n", "1345 | \n", "8.6 | \n", "Future Systems | \n", "
| 4 | \n", "AI00005 | \n", "AI Consultant | \n", "54624 | \n", "EUR | \n", "EN | \n", "PT | \n", "France | \n", "S | \n", "Singapore | \n", "100 | \n", "MLOps, Java, Tableau, Python | \n", "Master | \n", "0 | \n", "Media | \n", "2025-04-15 | \n", "2025-06-23 | \n", "1989 | \n", "6.6 | \n", "Advanced Robotics | \n", "