{
"cells": [
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"import faker\n",
"import pandas as pd\n",
"import random\n",
"import sqlite3"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"fake = faker.Faker()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"num_records = 250"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"# Generate dataset\n",
"customers_data = {\n",
" \"customer_id\": range(1, num_records + 1),\n",
" \"first_name\": [fake.first_name() for _ in range(num_records)],\n",
" \"last_name\": [fake.last_name() for _ in range(num_records)],\n",
" \"email\": [fake.email() for _ in range(num_records)],\n",
" \"phone_number\": [fake.phone_number() for _ in range(num_records)],\n",
" \"address\": [fake.street_address() for _ in range(num_records)],\n",
" \"city\": [fake.city() for _ in range(num_records)],\n",
" \"state\": [fake.state() for _ in range(num_records)],\n",
" \"zip_code\": [fake.zipcode() for _ in range(num_records)],\n",
" \"country\": [fake.country() for _ in range(num_records)],\n",
" \"date_of_birth\": [fake.date_of_birth().strftime(\"%Y-%m-%d\") for _ in range(num_records)],\n",
" \"gender\": [random.choice([\"Male\", \"Female\", \"Other\"]) for _ in range(num_records)]\n",
"}\n",
"\n",
"customers_df = pd.DataFrame(customers_data)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" customer_id | \n",
" first_name | \n",
" last_name | \n",
" email | \n",
" phone_number | \n",
" address | \n",
" city | \n",
" state | \n",
" zip_code | \n",
" country | \n",
" date_of_birth | \n",
" gender | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" Daniel | \n",
" Day | \n",
" hvalencia@example.net | \n",
" (671)991-3668 | \n",
" 2712 Matthew Course Apt. 519 | \n",
" Reginashire | \n",
" Virginia | \n",
" 43739 | \n",
" Portugal | \n",
" 1955-02-11 | \n",
" Male | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" Lucas | \n",
" Jimenez | \n",
" jennifer95@example.org | \n",
" 694.215.1833 | \n",
" 560 Victoria Shoals Apt. 465 | \n",
" Marshallmouth | \n",
" Oklahoma | \n",
" 90653 | \n",
" Albania | \n",
" 1909-06-06 | \n",
" Female | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" Victoria | \n",
" Willis | \n",
" millersean@example.org | \n",
" 769-267-3445 | \n",
" 58325 Buck Road Suite 830 | \n",
" South Pamelaborough | \n",
" Oregon | \n",
" 73729 | \n",
" Lithuania | \n",
" 1925-09-12 | \n",
" Other | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" Austin | \n",
" Carr | \n",
" arnoldjennifer@example.com | \n",
" 874-821-2653x36986 | \n",
" 01855 Peterson View Apt. 956 | \n",
" Potterton | \n",
" Wyoming | \n",
" 80500 | \n",
" Dominica | \n",
" 1920-06-23 | \n",
" Other | \n",
"
\n",
" \n",
" | 4 | \n",
" 5 | \n",
" Ethan | \n",
" Martin | \n",
" mark46@example.org | \n",
" 875-454-9228 | \n",
" 617 Clayton Tunnel | \n",
" Adamsport | \n",
" Michigan | \n",
" 38936 | \n",
" Yemen | \n",
" 1985-03-13 | \n",
" Female | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" customer_id first_name last_name email \\\n",
"0 1 Daniel Day hvalencia@example.net \n",
"1 2 Lucas Jimenez jennifer95@example.org \n",
"2 3 Victoria Willis millersean@example.org \n",
"3 4 Austin Carr arnoldjennifer@example.com \n",
"4 5 Ethan Martin mark46@example.org \n",
"\n",
" phone_number address city \\\n",
"0 (671)991-3668 2712 Matthew Course Apt. 519 Reginashire \n",
"1 694.215.1833 560 Victoria Shoals Apt. 465 Marshallmouth \n",
"2 769-267-3445 58325 Buck Road Suite 830 South Pamelaborough \n",
"3 874-821-2653x36986 01855 Peterson View Apt. 956 Potterton \n",
"4 875-454-9228 617 Clayton Tunnel Adamsport \n",
"\n",
" state zip_code country date_of_birth gender \n",
"0 Virginia 43739 Portugal 1955-02-11 Male \n",
"1 Oklahoma 90653 Albania 1909-06-06 Female \n",
"2 Oregon 73729 Lithuania 1925-09-12 Other \n",
"3 Wyoming 80500 Dominica 1920-06-23 Other \n",
"4 Michigan 38936 Yemen 1985-03-13 Female "
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customers_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"# Set seed for reproducibility\n",
"random.seed(42)\n",
"\n",
"# Define number of records\n",
"num_records = 1000\n",
"\n",
"# Generate dataset\n",
"items_data = {\n",
" \"id\": range(1, num_records + 1),\n",
" \"product_name\": [fake.catch_phrase() for _ in range(num_records)],\n",
" \"description\": [fake.paragraph(nb_sentences=3) for _ in range(num_records)],\n",
" \"price\": [round(random.uniform(10.0, 100.0), 2) for _ in range(num_records)],\n",
" \"category\": [random.choice([\"Electronics\", \"Fashion\", \"Home Goods\", \"Sports\", \"Toys\"]) for _ in range(num_records)],\n",
" \"sub_category\": [\n",
" random.choice([\n",
" \"Smartphones\", \"Laptops\", \"Tablets\",\n",
" \"Women's Clothing\", \"Men's Clothing\", \"Kids' Clothing\",\n",
" \"Kitchen Appliances\", \"Home Decor\", \"Furniture\",\n",
" \"Fitness Equipment\", \"Outdoor Gear\", \"Toys & Games\"\n",
" ]) for _ in range(num_records)\n",
" ],\n",
" \"brand\": [fake.company() for _ in range(num_records)],\n",
" \"rating\": [round(random.uniform(1.0, 5.0), 1) for _ in range(num_records)],\n",
" \"num_reviews\": [random.randint(1, 100) for _ in range(num_records)],\n",
" \"stock_quantity\": [random.randint(1, 100) for _ in range(num_records)],\n",
" \"seller_name\": [fake.name() for _ in range(num_records)],\n",
" \"shipping_weight\": [round(random.uniform(1.0, 10.0), 2) for _ in range(num_records)],\n",
" \"shipping_dimension\": [\n",
" f\"{random.randint(6, 20)} x {random.randint(4, 12)} x {random.randint(2, 8)}\"\n",
" for _ in range(num_records)\n",
" ]\n",
"}\n",
"\n",
"items_df = pd.DataFrame(items_data)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" product_name | \n",
" description | \n",
" price | \n",
" category | \n",
" sub_category | \n",
" brand | \n",
" rating | \n",
" num_reviews | \n",
" stock_quantity | \n",
" seller_name | \n",
" shipping_weight | \n",
" shipping_dimension | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" Ergonomic bottom-line framework | \n",
" Kind stay kid song dream. Yourself would scene... | \n",
" 67.55 | \n",
" Electronics | \n",
" Men's Clothing | \n",
" Gonzalez, Jones and Hanson | \n",
" 3.2 | \n",
" 52 | \n",
" 9 | \n",
" Kathryn Hansen | \n",
" 2.31 | \n",
" 18 x 6 x 6 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" Reduced high-level customer loyalty | \n",
" Nothing free around expert decade. Great view ... | \n",
" 12.25 | \n",
" Home Goods | \n",
" Toys & Games | \n",
" Walker-Love | \n",
" 1.7 | \n",
" 52 | \n",
" 34 | \n",
" Breanna Allison | \n",
" 1.40 | \n",
" 14 x 7 x 2 | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" Phased holistic capacity | \n",
" Fire usually high manage tend available. | \n",
" 34.75 | \n",
" Toys | \n",
" Laptops | \n",
" Nelson-Morrison | \n",
" 2.8 | \n",
" 59 | \n",
" 29 | \n",
" Allen Hernandez | \n",
" 8.36 | \n",
" 12 x 12 x 5 | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" Quality-focused 6thgeneration matrix | \n",
" Capital onto into eat unit church take ground.... | \n",
" 30.09 | \n",
" Home Goods | \n",
" Kids' Clothing | \n",
" Sullivan, Clark and Larson | \n",
" 4.0 | \n",
" 35 | \n",
" 48 | \n",
" Joseph Hayden | \n",
" 2.80 | \n",
" 19 x 7 x 6 | \n",
"
\n",
" \n",
" | 4 | \n",
" 5 | \n",
" Visionary systemic array | \n",
" Woman former wind bill red authority. Police s... | \n",
" 76.28 | \n",
" Electronics | \n",
" Home Decor | \n",
" Evans PLC | \n",
" 4.1 | \n",
" 50 | \n",
" 11 | \n",
" John Mcdowell | \n",
" 4.36 | \n",
" 13 x 11 x 4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id product_name \\\n",
"0 1 Ergonomic bottom-line framework \n",
"1 2 Reduced high-level customer loyalty \n",
"2 3 Phased holistic capacity \n",
"3 4 Quality-focused 6thgeneration matrix \n",
"4 5 Visionary systemic array \n",
"\n",
" description price category \\\n",
"0 Kind stay kid song dream. Yourself would scene... 67.55 Electronics \n",
"1 Nothing free around expert decade. Great view ... 12.25 Home Goods \n",
"2 Fire usually high manage tend available. 34.75 Toys \n",
"3 Capital onto into eat unit church take ground.... 30.09 Home Goods \n",
"4 Woman former wind bill red authority. Police s... 76.28 Electronics \n",
"\n",
" sub_category brand rating num_reviews \\\n",
"0 Men's Clothing Gonzalez, Jones and Hanson 3.2 52 \n",
"1 Toys & Games Walker-Love 1.7 52 \n",
"2 Laptops Nelson-Morrison 2.8 59 \n",
"3 Kids' Clothing Sullivan, Clark and Larson 4.0 35 \n",
"4 Home Decor Evans PLC 4.1 50 \n",
"\n",
" stock_quantity seller_name shipping_weight shipping_dimension \n",
"0 9 Kathryn Hansen 2.31 18 x 6 x 6 \n",
"1 34 Breanna Allison 1.40 14 x 7 x 2 \n",
"2 29 Allen Hernandez 8.36 12 x 12 x 5 \n",
"3 48 Joseph Hayden 2.80 19 x 7 x 6 \n",
"4 11 John Mcdowell 4.36 13 x 11 x 4 "
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"items_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"# Define number of orders\n",
"num_orders = 5000\n",
"\n",
"# Generate orders dataset\n",
"data = {\n",
" \"order_id\": range(1, num_orders + 1),\n",
" \"customer_id\": [random.choice(customers_df[\"customer_id\"]) for _ in range(num_orders)],\n",
" \"product_id\": [random.choice(items_df[\"id\"]) for _ in range(num_orders)],\n",
" \"order_date\": [fake.date_time_between(start_date=\"-2y\", end_date=\"now\").strftime(\"%Y-%m-%d %H:%M:%S\") for _ in range(num_orders)],\n",
" \"order_status\": [random.choice([\"Pending\", \"Shipped\", \"Delivered\", \"Cancelled\"]) for _ in range(num_orders)],\n",
" \"payment_method\": [random.choice([\"Credit Card\", \"PayPal\", \"Bank Transfer\"]) for _ in range(num_orders)],\n",
" \"total_amount\": [round(random.uniform(10.0, 100.0), 2) for _ in range(num_orders)],\n",
" \"shipping_address\": [fake.street_address() for _ in range(num_orders)],\n",
" \"shipping_city\": [fake.city() for _ in range(num_orders)],\n",
" \"shipping_state\": [fake.state() for _ in range(num_orders)],\n",
" \"shipping_zip\": [fake.zipcode() for _ in range(num_orders)],\n",
" \"shipping_country\": [fake.country() for _ in range(num_orders)]\n",
"}\n",
"\n",
"orders_df = pd.DataFrame(data)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" order_id | \n",
" customer_id | \n",
" product_id | \n",
" order_date | \n",
" order_status | \n",
" payment_method | \n",
" total_amount | \n",
" shipping_address | \n",
" shipping_city | \n",
" shipping_state | \n",
" shipping_zip | \n",
" shipping_country | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 85 | \n",
" 506 | \n",
" 2024-07-03 08:05:03 | \n",
" Pending | \n",
" Credit Card | \n",
" 54.40 | \n",
" 140 Edwards Overpass | \n",
" Kingtown | \n",
" Kansas | \n",
" 05046 | \n",
" British Virgin Islands | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" 88 | \n",
" 270 | \n",
" 2024-09-21 12:08:46 | \n",
" Shipped | \n",
" Bank Transfer | \n",
" 54.55 | \n",
" 811 Blair Glen Apt. 318 | \n",
" Port Andrew | \n",
" New Jersey | \n",
" 46407 | \n",
" Liberia | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" 63 | \n",
" 89 | \n",
" 2024-04-28 09:50:13 | \n",
" Shipped | \n",
" PayPal | \n",
" 38.34 | \n",
" 35571 Debra Stravenue | \n",
" Warrenhaven | \n",
" Louisiana | \n",
" 78358 | \n",
" Maldives | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" 53 | \n",
" 886 | \n",
" 2024-03-03 22:47:52 | \n",
" Pending | \n",
" Bank Transfer | \n",
" 46.67 | \n",
" 45222 Karen Trace Apt. 530 | \n",
" Nicoleland | \n",
" North Dakota | \n",
" 91684 | \n",
" United States Minor Outlying Islands | \n",
"
\n",
" \n",
" | 4 | \n",
" 5 | \n",
" 139 | \n",
" 141 | \n",
" 2024-02-06 20:16:53 | \n",
" Shipped | \n",
" Bank Transfer | \n",
" 11.09 | \n",
" 61721 Perez Walks Apt. 244 | \n",
" Lake Curtischester | \n",
" New York | \n",
" 22193 | \n",
" Bangladesh | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" order_id customer_id product_id order_date order_status \\\n",
"0 1 85 506 2024-07-03 08:05:03 Pending \n",
"1 2 88 270 2024-09-21 12:08:46 Shipped \n",
"2 3 63 89 2024-04-28 09:50:13 Shipped \n",
"3 4 53 886 2024-03-03 22:47:52 Pending \n",
"4 5 139 141 2024-02-06 20:16:53 Shipped \n",
"\n",
" payment_method total_amount shipping_address \\\n",
"0 Credit Card 54.40 140 Edwards Overpass \n",
"1 Bank Transfer 54.55 811 Blair Glen Apt. 318 \n",
"2 PayPal 38.34 35571 Debra Stravenue \n",
"3 Bank Transfer 46.67 45222 Karen Trace Apt. 530 \n",
"4 Bank Transfer 11.09 61721 Perez Walks Apt. 244 \n",
"\n",
" shipping_city shipping_state shipping_zip \\\n",
"0 Kingtown Kansas 05046 \n",
"1 Port Andrew New Jersey 46407 \n",
"2 Warrenhaven Louisiana 78358 \n",
"3 Nicoleland North Dakota 91684 \n",
"4 Lake Curtischester New York 22193 \n",
"\n",
" shipping_country \n",
"0 British Virgin Islands \n",
"1 Liberia \n",
"2 Maldives \n",
"3 United States Minor Outlying Islands \n",
"4 Bangladesh "
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"orders_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Save the dataframe to SQLite"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
"# Create a connection to the SQLite database\n",
"conn = sqlite3.connect('ecomm.db')\n",
"\n",
"# Save the DataFrame to the SQLite database\n",
"customers_df.to_sql('customer_details', conn, if_exists='replace', index=False)\n",
"items_df.to_sql('items', conn, if_exists='replace', index=False)\n",
"orders_df.to_sql('orders', conn, if_exists='replace', index=False)\n",
"\n",
"# Close the connection\n",
"conn.close()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Deleting cusomers table from database"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
"# Establish a connection to the database\n",
"conn = sqlite3.connect('ecomm.db')\n",
"\n",
"# Create a cursor object\n",
"cur = conn.cursor()\n",
"\n",
"# Delete the table\n",
"cur.execute('DROP TABLE customers')\n",
"\n",
"# Commit the changes\n",
"conn.commit()\n",
"\n",
"# Close the connection\n",
"conn.close()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "langenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.17"
}
},
"nbformat": 4,
"nbformat_minor": 2
}