{ "cells": [ { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "import faker\n", "import pandas as pd\n", "import random\n", "import sqlite3" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "fake = faker.Faker()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "num_records = 250" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "# Generate dataset\n", "customers_data = {\n", " \"customer_id\": range(1, num_records + 1),\n", " \"first_name\": [fake.first_name() for _ in range(num_records)],\n", " \"last_name\": [fake.last_name() for _ in range(num_records)],\n", " \"email\": [fake.email() for _ in range(num_records)],\n", " \"phone_number\": [fake.phone_number() for _ in range(num_records)],\n", " \"address\": [fake.street_address() for _ in range(num_records)],\n", " \"city\": [fake.city() for _ in range(num_records)],\n", " \"state\": [fake.state() for _ in range(num_records)],\n", " \"zip_code\": [fake.zipcode() for _ in range(num_records)],\n", " \"country\": [fake.country() for _ in range(num_records)],\n", " \"date_of_birth\": [fake.date_of_birth().strftime(\"%Y-%m-%d\") for _ in range(num_records)],\n", " \"gender\": [random.choice([\"Male\", \"Female\", \"Other\"]) for _ in range(num_records)]\n", "}\n", "\n", "customers_df = pd.DataFrame(customers_data)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
customer_idfirst_namelast_nameemailphone_numberaddresscitystatezip_codecountrydate_of_birthgender
01DanielDayhvalencia@example.net(671)991-36682712 Matthew Course Apt. 519ReginashireVirginia43739Portugal1955-02-11Male
12LucasJimenezjennifer95@example.org694.215.1833560 Victoria Shoals Apt. 465MarshallmouthOklahoma90653Albania1909-06-06Female
23VictoriaWillismillersean@example.org769-267-344558325 Buck Road Suite 830South PamelaboroughOregon73729Lithuania1925-09-12Other
34AustinCarrarnoldjennifer@example.com874-821-2653x3698601855 Peterson View Apt. 956PottertonWyoming80500Dominica1920-06-23Other
45EthanMartinmark46@example.org875-454-9228617 Clayton TunnelAdamsportMichigan38936Yemen1985-03-13Female
\n", "
" ], "text/plain": [ " customer_id first_name last_name email \\\n", "0 1 Daniel Day hvalencia@example.net \n", "1 2 Lucas Jimenez jennifer95@example.org \n", "2 3 Victoria Willis millersean@example.org \n", "3 4 Austin Carr arnoldjennifer@example.com \n", "4 5 Ethan Martin mark46@example.org \n", "\n", " phone_number address city \\\n", "0 (671)991-3668 2712 Matthew Course Apt. 519 Reginashire \n", "1 694.215.1833 560 Victoria Shoals Apt. 465 Marshallmouth \n", "2 769-267-3445 58325 Buck Road Suite 830 South Pamelaborough \n", "3 874-821-2653x36986 01855 Peterson View Apt. 956 Potterton \n", "4 875-454-9228 617 Clayton Tunnel Adamsport \n", "\n", " state zip_code country date_of_birth gender \n", "0 Virginia 43739 Portugal 1955-02-11 Male \n", "1 Oklahoma 90653 Albania 1909-06-06 Female \n", "2 Oregon 73729 Lithuania 1925-09-12 Other \n", "3 Wyoming 80500 Dominica 1920-06-23 Other \n", "4 Michigan 38936 Yemen 1985-03-13 Female " ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "customers_df.head()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "# Set seed for reproducibility\n", "random.seed(42)\n", "\n", "# Define number of records\n", "num_records = 1000\n", "\n", "# Generate dataset\n", "items_data = {\n", " \"id\": range(1, num_records + 1),\n", " \"product_name\": [fake.catch_phrase() for _ in range(num_records)],\n", " \"description\": [fake.paragraph(nb_sentences=3) for _ in range(num_records)],\n", " \"price\": [round(random.uniform(10.0, 100.0), 2) for _ in range(num_records)],\n", " \"category\": [random.choice([\"Electronics\", \"Fashion\", \"Home Goods\", \"Sports\", \"Toys\"]) for _ in range(num_records)],\n", " \"sub_category\": [\n", " random.choice([\n", " \"Smartphones\", \"Laptops\", \"Tablets\",\n", " \"Women's Clothing\", \"Men's Clothing\", \"Kids' Clothing\",\n", " \"Kitchen Appliances\", \"Home Decor\", \"Furniture\",\n", " \"Fitness Equipment\", \"Outdoor Gear\", \"Toys & Games\"\n", " ]) for _ in range(num_records)\n", " ],\n", " \"brand\": [fake.company() for _ in range(num_records)],\n", " \"rating\": [round(random.uniform(1.0, 5.0), 1) for _ in range(num_records)],\n", " \"num_reviews\": [random.randint(1, 100) for _ in range(num_records)],\n", " \"stock_quantity\": [random.randint(1, 100) for _ in range(num_records)],\n", " \"seller_name\": [fake.name() for _ in range(num_records)],\n", " \"shipping_weight\": [round(random.uniform(1.0, 10.0), 2) for _ in range(num_records)],\n", " \"shipping_dimension\": [\n", " f\"{random.randint(6, 20)} x {random.randint(4, 12)} x {random.randint(2, 8)}\"\n", " for _ in range(num_records)\n", " ]\n", "}\n", "\n", "items_df = pd.DataFrame(items_data)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idproduct_namedescriptionpricecategorysub_categorybrandratingnum_reviewsstock_quantityseller_nameshipping_weightshipping_dimension
01Ergonomic bottom-line frameworkKind stay kid song dream. Yourself would scene...67.55ElectronicsMen's ClothingGonzalez, Jones and Hanson3.2529Kathryn Hansen2.3118 x 6 x 6
12Reduced high-level customer loyaltyNothing free around expert decade. Great view ...12.25Home GoodsToys & GamesWalker-Love1.75234Breanna Allison1.4014 x 7 x 2
23Phased holistic capacityFire usually high manage tend available.34.75ToysLaptopsNelson-Morrison2.85929Allen Hernandez8.3612 x 12 x 5
34Quality-focused 6thgeneration matrixCapital onto into eat unit church take ground....30.09Home GoodsKids' ClothingSullivan, Clark and Larson4.03548Joseph Hayden2.8019 x 7 x 6
45Visionary systemic arrayWoman former wind bill red authority. Police s...76.28ElectronicsHome DecorEvans PLC4.15011John Mcdowell4.3613 x 11 x 4
\n", "
" ], "text/plain": [ " id product_name \\\n", "0 1 Ergonomic bottom-line framework \n", "1 2 Reduced high-level customer loyalty \n", "2 3 Phased holistic capacity \n", "3 4 Quality-focused 6thgeneration matrix \n", "4 5 Visionary systemic array \n", "\n", " description price category \\\n", "0 Kind stay kid song dream. Yourself would scene... 67.55 Electronics \n", "1 Nothing free around expert decade. Great view ... 12.25 Home Goods \n", "2 Fire usually high manage tend available. 34.75 Toys \n", "3 Capital onto into eat unit church take ground.... 30.09 Home Goods \n", "4 Woman former wind bill red authority. Police s... 76.28 Electronics \n", "\n", " sub_category brand rating num_reviews \\\n", "0 Men's Clothing Gonzalez, Jones and Hanson 3.2 52 \n", "1 Toys & Games Walker-Love 1.7 52 \n", "2 Laptops Nelson-Morrison 2.8 59 \n", "3 Kids' Clothing Sullivan, Clark and Larson 4.0 35 \n", "4 Home Decor Evans PLC 4.1 50 \n", "\n", " stock_quantity seller_name shipping_weight shipping_dimension \n", "0 9 Kathryn Hansen 2.31 18 x 6 x 6 \n", "1 34 Breanna Allison 1.40 14 x 7 x 2 \n", "2 29 Allen Hernandez 8.36 12 x 12 x 5 \n", "3 48 Joseph Hayden 2.80 19 x 7 x 6 \n", "4 11 John Mcdowell 4.36 13 x 11 x 4 " ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "items_df.head()" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "# Define number of orders\n", "num_orders = 5000\n", "\n", "# Generate orders dataset\n", "data = {\n", " \"order_id\": range(1, num_orders + 1),\n", " \"customer_id\": [random.choice(customers_df[\"customer_id\"]) for _ in range(num_orders)],\n", " \"product_id\": [random.choice(items_df[\"id\"]) for _ in range(num_orders)],\n", " \"order_date\": [fake.date_time_between(start_date=\"-2y\", end_date=\"now\").strftime(\"%Y-%m-%d %H:%M:%S\") for _ in range(num_orders)],\n", " \"order_status\": [random.choice([\"Pending\", \"Shipped\", \"Delivered\", \"Cancelled\"]) for _ in range(num_orders)],\n", " \"payment_method\": [random.choice([\"Credit Card\", \"PayPal\", \"Bank Transfer\"]) for _ in range(num_orders)],\n", " \"total_amount\": [round(random.uniform(10.0, 100.0), 2) for _ in range(num_orders)],\n", " \"shipping_address\": [fake.street_address() for _ in range(num_orders)],\n", " \"shipping_city\": [fake.city() for _ in range(num_orders)],\n", " \"shipping_state\": [fake.state() for _ in range(num_orders)],\n", " \"shipping_zip\": [fake.zipcode() for _ in range(num_orders)],\n", " \"shipping_country\": [fake.country() for _ in range(num_orders)]\n", "}\n", "\n", "orders_df = pd.DataFrame(data)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
order_idcustomer_idproduct_idorder_dateorder_statuspayment_methodtotal_amountshipping_addressshipping_cityshipping_stateshipping_zipshipping_country
01855062024-07-03 08:05:03PendingCredit Card54.40140 Edwards OverpassKingtownKansas05046British Virgin Islands
12882702024-09-21 12:08:46ShippedBank Transfer54.55811 Blair Glen Apt. 318Port AndrewNew Jersey46407Liberia
2363892024-04-28 09:50:13ShippedPayPal38.3435571 Debra StravenueWarrenhavenLouisiana78358Maldives
34538862024-03-03 22:47:52PendingBank Transfer46.6745222 Karen Trace Apt. 530NicolelandNorth Dakota91684United States Minor Outlying Islands
451391412024-02-06 20:16:53ShippedBank Transfer11.0961721 Perez Walks Apt. 244Lake CurtischesterNew York22193Bangladesh
\n", "
" ], "text/plain": [ " order_id customer_id product_id order_date order_status \\\n", "0 1 85 506 2024-07-03 08:05:03 Pending \n", "1 2 88 270 2024-09-21 12:08:46 Shipped \n", "2 3 63 89 2024-04-28 09:50:13 Shipped \n", "3 4 53 886 2024-03-03 22:47:52 Pending \n", "4 5 139 141 2024-02-06 20:16:53 Shipped \n", "\n", " payment_method total_amount shipping_address \\\n", "0 Credit Card 54.40 140 Edwards Overpass \n", "1 Bank Transfer 54.55 811 Blair Glen Apt. 318 \n", "2 PayPal 38.34 35571 Debra Stravenue \n", "3 Bank Transfer 46.67 45222 Karen Trace Apt. 530 \n", "4 Bank Transfer 11.09 61721 Perez Walks Apt. 244 \n", "\n", " shipping_city shipping_state shipping_zip \\\n", "0 Kingtown Kansas 05046 \n", "1 Port Andrew New Jersey 46407 \n", "2 Warrenhaven Louisiana 78358 \n", "3 Nicoleland North Dakota 91684 \n", "4 Lake Curtischester New York 22193 \n", "\n", " shipping_country \n", "0 British Virgin Islands \n", "1 Liberia \n", "2 Maldives \n", "3 United States Minor Outlying Islands \n", "4 Bangladesh " ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "orders_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Save the dataframe to SQLite" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [], "source": [ "# Create a connection to the SQLite database\n", "conn = sqlite3.connect('ecomm.db')\n", "\n", "# Save the DataFrame to the SQLite database\n", "customers_df.to_sql('customer_details', conn, if_exists='replace', index=False)\n", "items_df.to_sql('items', conn, if_exists='replace', index=False)\n", "orders_df.to_sql('orders', conn, if_exists='replace', index=False)\n", "\n", "# Close the connection\n", "conn.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Deleting cusomers table from database" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [], "source": [ "# Establish a connection to the database\n", "conn = sqlite3.connect('ecomm.db')\n", "\n", "# Create a cursor object\n", "cur = conn.cursor()\n", "\n", "# Delete the table\n", "cur.execute('DROP TABLE customers')\n", "\n", "# Commit the changes\n", "conn.commit()\n", "\n", "# Close the connection\n", "conn.close()" ] } ], "metadata": { "kernelspec": { "display_name": "langenv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.17" } }, "nbformat": 4, "nbformat_minor": 2 }