{ "cells": [ { "cell_type": "code", "execution_count": 20, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "lv2nI8oDZBwx", "outputId": "a0783fd9-bab8-4f2d-a35c-2ec12ea39da4" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: sqlalchemy in /usr/local/lib/python3.12/dist-packages (2.0.44)\n", "Requirement already satisfied: psycopg2 in /usr/local/lib/python3.12/dist-packages (2.9.11)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.12/dist-packages (2.2.2)\n", "Requirement already satisfied: greenlet>=1 in /usr/local/lib/python3.12/dist-packages (from sqlalchemy) (3.2.4)\n", "Requirement already satisfied: typing-extensions>=4.6.0 in /usr/local/lib/python3.12/dist-packages (from sqlalchemy) (4.15.0)\n", "Requirement already satisfied: numpy>=1.26.0 in /usr/local/lib/python3.12/dist-packages (from pandas) (2.0.2)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/dist-packages (from pandas) (2.9.0.post0)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas) (2025.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas) (2025.2)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n" ] } ], "source": [ "!pip install sqlalchemy psycopg2 pandas" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 360 }, "id": "4jJ55ColUV5d", "outputId": "df044c10-364c-4c77-ea53-f3d257f213db" }, "outputs": [ { "data": { "application/vnd.google.colaboratory.intrinsic+json": { "summary": "{\n \"name\": \"data\",\n \"rows\": 15000,\n \"fields\": [\n {\n \"column\": \"job_id\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 15000,\n \"samples\": [\n \"AI11500\",\n \"AI06476\",\n \"AI13168\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"job_title\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 20,\n \"samples\": [\n \"AI Research Scientist\",\n \"Data Scientist\",\n \"Head of AI\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"salary_usd\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 60260,\n \"min\": 32519,\n \"max\": 399095,\n \"num_unique_values\": 14315,\n \"samples\": [\n 121638,\n 125960,\n 47012\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"salary_currency\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"USD\",\n \"EUR\",\n \"GBP\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"experience_level\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"EN\",\n \"EX\",\n \"SE\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"employment_type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"FL\",\n \"FT\",\n \"CT\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"company_location\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 20,\n \"samples\": [\n \"China\",\n \"Denmark\",\n \"Australia\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"company_size\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"M\",\n \"L\",\n \"S\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"employee_residence\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 20,\n \"samples\": [\n \"China\",\n \"Japan\",\n \"Switzerland\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"remote_ratio\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 40,\n \"min\": 0,\n \"max\": 100,\n \"num_unique_values\": 3,\n \"samples\": [\n 50,\n 100,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"required_skills\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 13663,\n \"samples\": [\n \"Spark, Python, Hadoop\",\n \"PyTorch, Kubernetes, Computer Vision, Docker\",\n \"Git, Hadoop, Java, PyTorch\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"education_required\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Master\",\n \"PhD\",\n \"Bachelor\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"years_experience\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5,\n \"min\": 0,\n \"max\": 19,\n \"num_unique_values\": 20,\n \"samples\": [\n 9,\n 13,\n 19\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"industry\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"Energy\",\n \"Real Estate\",\n \"Automotive\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"posting_date\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 486,\n \"samples\": [\n \"2024-12-01\",\n \"2024-08-15\",\n \"2024-04-16\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"application_deadline\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 543,\n \"samples\": [\n \"2025-07-06\",\n \"2025-07-01\",\n \"2024-03-18\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"job_description_length\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 576,\n \"min\": 500,\n \"max\": 2499,\n \"num_unique_values\": 2000,\n \"samples\": [\n 1520,\n 571,\n 1113\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"benefits_score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.4508696496026827,\n \"min\": 5.0,\n \"max\": 10.0,\n \"num_unique_values\": 51,\n \"samples\": [\n 8.4,\n 9.5,\n 8.3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"company_name\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 16,\n \"samples\": [\n \"Smart Analytics\",\n \"TechCorp Inc\",\n \"Neural Networks Co\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", "type": "dataframe", "variable_name": "data" }, "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
job_idjob_titlesalary_usdsalary_currencyexperience_levelemployment_typecompany_locationcompany_sizeemployee_residenceremote_ratiorequired_skillseducation_requiredyears_experienceindustryposting_dateapplication_deadlinejob_description_lengthbenefits_scorecompany_name
0AI00001AI Research Scientist90376USDSECTChinaMChina50Tableau, PyTorch, Kubernetes, Linux, NLPBachelor9Automotive2024-10-182024-11-0710765.9Smart Analytics
1AI00002AI Software Engineer61895USDENCTCanadaMIreland100Deep Learning, AWS, Mathematics, Python, DockerMaster1Media2024-11-202025-01-1112685.2TechCorp Inc
2AI00003AI Specialist152626USDMIFLSwitzerlandLSouth Korea0Kubernetes, Deep Learning, Java, Hadoop, NLPAssociate2Education2025-03-182025-04-0719749.4Autonomous Tech
3AI00004NLP Engineer80215USDSEFLIndiaMIndia50Scala, SQL, Linux, PythonPhD7Consulting2024-12-232025-02-2413458.6Future Systems
4AI00005AI Consultant54624EURENPTFranceSSingapore100MLOps, Java, Tableau, PythonMaster0Media2025-04-152025-06-2319896.6Advanced Robotics
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "text/plain": [ " job_id job_title salary_usd salary_currency \\\n", "0 AI00001 AI Research Scientist 90376 USD \n", "1 AI00002 AI Software Engineer 61895 USD \n", "2 AI00003 AI Specialist 152626 USD \n", "3 AI00004 NLP Engineer 80215 USD \n", "4 AI00005 AI Consultant 54624 EUR \n", "\n", " experience_level employment_type company_location company_size \\\n", "0 SE CT China M \n", "1 EN CT Canada M \n", "2 MI FL Switzerland L \n", "3 SE FL India M \n", "4 EN PT France S \n", "\n", " employee_residence remote_ratio \\\n", "0 China 50 \n", "1 Ireland 100 \n", "2 South Korea 0 \n", "3 India 50 \n", "4 Singapore 100 \n", "\n", " required_skills education_required \\\n", "0 Tableau, PyTorch, Kubernetes, Linux, NLP Bachelor \n", "1 Deep Learning, AWS, Mathematics, Python, Docker Master \n", "2 Kubernetes, Deep Learning, Java, Hadoop, NLP Associate \n", "3 Scala, SQL, Linux, Python PhD \n", "4 MLOps, Java, Tableau, Python Master \n", "\n", " years_experience industry posting_date application_deadline \\\n", "0 9 Automotive 2024-10-18 2024-11-07 \n", "1 1 Media 2024-11-20 2025-01-11 \n", "2 2 Education 2025-03-18 2025-04-07 \n", "3 7 Consulting 2024-12-23 2025-02-24 \n", "4 0 Media 2025-04-15 2025-06-23 \n", "\n", " job_description_length benefits_score company_name \n", "0 1076 5.9 Smart Analytics \n", "1 1268 5.2 TechCorp Inc \n", "2 1974 9.4 Autonomous Tech \n", "3 1345 8.6 Future Systems \n", "4 1989 6.6 Advanced Robotics " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "data = pd.read_csv(\"/content/ai_job_dataset.csv\")\n", "data.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "EGq5wykmYr37" }, "outputs": [], "source": [ "from sqlalchemy import create_engine\n", "import os\n", "engine = create_engine(\n", " os.environ.get(\"POSTGRE_CONNECTION_STRING\")\n", ")" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "_UpDDKFbYv8q", "outputId": "e3f3f43e-1f77-4a24-b1a8-7db41ae75d76" }, "outputs": [ { "data": { "text/plain": [ "1000" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.to_sql(\"sampleTable\", con = engine)" ] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }