PD03 commited on
Commit
bc02e50
·
verified ·
1 Parent(s): c922ef8

Create data/synthetic_data.py

Browse files
Files changed (1) hide show
  1. data/synthetic_data.py +89 -0
data/synthetic_data.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from faker import Faker
4
+ from datetime import datetime, timedelta
5
+ import random
6
+
7
+ fake = Faker()
8
+
9
+ class SAPDataGenerator:
10
+ def __init__(self):
11
+ self.suppliers = [
12
+ "Acme Corp", "Global Supplies Inc", "Tech Solutions Ltd",
13
+ "Industrial Partners", "Premium Materials Co", "Swift Logistics",
14
+ "Quality Components", "Reliable Vendors", "Innovative Systems",
15
+ "Professional Services"
16
+ ]
17
+
18
+ self.categories = [
19
+ "Raw Materials", "IT Equipment", "Office Supplies",
20
+ "Professional Services", "Maintenance", "Transportation",
21
+ "Marketing", "Facilities", "Security", "Consulting"
22
+ ]
23
+
24
+ self.plant_codes = ["1000", "2000", "3000", "4000", "5000"]
25
+ self.company_codes = ["US01", "DE02", "IN03", "UK04", "SG05"]
26
+
27
+ def generate_purchase_orders(self, n=1000):
28
+ data = []
29
+ for i in range(n):
30
+ po_date = fake.date_between(start_date='-2y', end_date='today')
31
+ delivery_date = po_date + timedelta(days=random.randint(7, 90))
32
+
33
+ unit_price = round(random.uniform(10, 10000), 2)
34
+ quantity = random.randint(1, 1000)
35
+ total_value = round(unit_price * quantity, 2)
36
+
37
+ data.append({
38
+ 'PO_Number': f"PO{str(i+1).zfill(8)}",
39
+ 'Supplier': random.choice(self.suppliers),
40
+ 'Category': random.choice(self.categories),
41
+ 'Plant': random.choice(self.plant_codes),
42
+ 'Company_Code': random.choice(self.company_codes),
43
+ 'PO_Date': po_date,
44
+ 'Delivery_Date': delivery_date,
45
+ 'Material': fake.catch_phrase(),
46
+ 'Quantity': quantity,
47
+ 'Unit_Price': unit_price,
48
+ 'Total_Value': total_value,
49
+ 'Currency': 'USD',
50
+ 'Status': random.choice(['Open', 'Delivered', 'Partially Delivered', 'Cancelled']),
51
+ 'Buyer': fake.name(),
52
+ 'Payment_Terms': random.choice(['Net 30', 'Net 60', '2/10 Net 30', 'Immediate']),
53
+ 'Delivery_Performance': round(random.uniform(85, 99), 1)
54
+ })
55
+
56
+ return pd.DataFrame(data)
57
+
58
+ def generate_supplier_performance(self):
59
+ data = []
60
+ for supplier in self.suppliers:
61
+ data.append({
62
+ 'Supplier': supplier,
63
+ 'On_Time_Delivery': round(random.uniform(85, 98), 1),
64
+ 'Quality_Score': round(random.uniform(80, 99), 1),
65
+ 'Cost_Performance': round(random.uniform(90, 99), 1),
66
+ 'Total_Spend_YTD': round(random.uniform(100000, 5000000), 2),
67
+ 'Active_Contracts': random.randint(5, 50),
68
+ 'Risk_Score': random.choice(['Low', 'Medium', 'High']),
69
+ 'Certification_Status': random.choice(['ISO 9001', 'ISO 14001', 'Multiple', 'None'])
70
+ })
71
+
72
+ return pd.DataFrame(data)
73
+
74
+ def generate_spend_analysis(self):
75
+ months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
76
+ 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
77
+ data = []
78
+
79
+ for month in months:
80
+ for category in self.categories:
81
+ data.append({
82
+ 'Month': month,
83
+ 'Category': category,
84
+ 'Spend': round(random.uniform(50000, 500000), 2),
85
+ 'Budget': round(random.uniform(60000, 550000), 2),
86
+ 'Variance': round(random.uniform(-10, 15), 1)
87
+ })
88
+
89
+ return pd.DataFrame(data)