3morrrrr commited on
Commit
9374a4b
·
verified ·
1 Parent(s): 4e03c68

Update helper.py

Browse files
Files changed (1) hide show
  1. helper.py +69 -227
helper.py CHANGED
@@ -1,15 +1,10 @@
1
- import pandas as pd
2
  import os
3
- from sklearn.preprocessing import MinMaxScaler
4
  import random
5
  import re
6
-
7
- import pandas as pd
8
- from sklearn.preprocessing import MinMaxScaler
9
-
10
- import pandas as pd
11
  from sklearn.preprocessing import MinMaxScaler
12
 
 
13
  def assign_main_accounts(creators_file, chatter_files):
14
  creators = pd.read_excel(creators_file)
15
  creators.columns = creators.columns.str.strip()
@@ -17,6 +12,7 @@ def assign_main_accounts(creators_file, chatter_files):
17
  # Debugging: Check initial columns
18
  print("DEBUG: Initial Columns in Creator File:", creators.columns)
19
 
 
20
  column_mapping = {
21
  "Creator": "Creator",
22
  "Total earnings": "Total earnings",
@@ -39,6 +35,7 @@ def assign_main_accounts(creators_file, chatter_files):
39
  creators["Subscription"] = creators["Subscription"].replace("[\$,]", "", regex=True).astype(float)
40
  creators["ActiveFans"] = pd.to_numeric(creators["ActiveFans"], errors="coerce").fillna(0)
41
 
 
42
  scaler = MinMaxScaler()
43
  creators[["Earnings_Normalized", "Subscriptions_Normalized"]] = scaler.fit_transform(
44
  creators[["Total earnings", "Subscription"]]
@@ -53,275 +50,120 @@ def assign_main_accounts(creators_file, chatter_files):
53
  processed_creator_file = creators[["Creator", "ActiveFans"]]
54
 
55
  updated_chatter_files = []
56
- assignments = []
57
-
58
- for idx, chatter_file in enumerate(chatter_files):
59
  chatters = pd.read_excel(chatter_file)
 
 
 
 
 
60
  chatters["Main Account"] = creators.iloc[:len(chatters)]["Creator"].values
61
  updated_chatter_files.append(chatters)
62
- assignments.append(chatters)
63
-
64
- return updated_chatter_files, processed_creator_file, pd.concat(assignments)
65
-
66
-
67
-
68
-
69
-
70
-
71
-
72
 
 
73
 
74
 
75
  def save_processed_files(assignments, output_dir):
76
  """
77
- Save processed files for main assignments, ensuring chatter names and main accounts are preserved correctly.
78
  """
79
- for shift, data in assignments.items():
80
- if shift == "creator_names":
81
- continue
82
-
83
- # Create a DataFrame from the assignment data
84
- df = pd.DataFrame(data)
85
-
86
- # Handle multiple 'Main Account' columns and ensure there's only one
87
- if "Main Account_x" in df.columns and "Main Account_y" in df.columns:
88
- df["Main Account"] = df["Main Account_x"].fillna(df["Main Account_y"])
89
- df.drop(columns=["Main Account_x", "Main Account_y"], inplace=True)
90
- elif "Main Account_x" in df.columns:
91
- df.rename(columns={"Main Account_x": "Main Account"}, inplace=True)
92
- elif "Main Account_y" in df.columns:
93
- df.rename(columns={"Main Account_y": "Main Account"}, inplace=True)
94
-
95
- # Ensure all other columns (like 'Final Rating', 'Desired Off Day', etc.) are retained
96
- required_columns = ["Name", "Main Account", "Final Rating", "Available Work Days", "Desired Off Day"]
97
- for col in required_columns:
98
- if col not in df.columns:
99
- df[col] = None # Add missing columns as empty
100
-
101
- # Ensure proper ordering of columns for consistency
102
- column_order = ["Name", "Main Account", "Final Rating", "Available Work Days", "Desired Off Day"]
103
- df = df[[col for col in column_order if col in df.columns] + [col for col in df.columns if col not in column_order]]
104
-
105
- # Save the cleaned DataFrame
106
- output_path = os.path.join(output_dir, f"Updated_{shift}_file.xlsx")
107
- df.to_excel(output_path, index=False)
108
-
109
- # Debugging: Verify the saved file contains the right columns
110
- print(f"DEBUG: Saved File for {shift}: {output_path}")
111
- print(df.head())
112
 
113
 
114
-
115
-
116
- def generate_schedule(chatter_files, account_data):
117
  """
118
- Generate schedules for different shifts (Overnight, Day, Prime) using chatter and account data.
119
  """
120
- schedules = {}
121
-
122
- # Validate required columns in the account data
123
- if not {"Creator", "ActiveFans"}.issubset(account_data.columns):
124
- raise KeyError("The account data must contain 'Creator' and 'ActiveFans' columns.")
125
-
126
- shift_names = ["Overnight", "Day", "Prime"]
127
-
128
- for idx, chatter_df in enumerate(chatter_files):
129
- shift_name = shift_names[idx]
130
-
131
- # Debugging: Print initial chatter data
132
- print(f"DEBUG: Initial {shift_name} Chatter Data:")
133
- print(chatter_df.head())
134
-
135
- # Clean chatter data
136
- chatter_df = clean_chatter_data(chatter_df)
137
-
138
- # Debugging: Print cleaned chatter data
139
- print(f"DEBUG: Cleaned {shift_name} Chatter Data:")
140
- print(chatter_df.head())
141
-
142
- # Create a blank schedule template
143
- schedule = create_schedule_template(account_data)
144
-
145
- # Debugging: Print initial schedule template
146
- print(f"DEBUG: Initial Schedule Template for {shift_name}:")
147
- print(schedule.head())
148
-
149
- # Assign main accounts to the schedule
150
- schedule = assign_main_accounts_to_schedule(schedule, chatter_df)
151
-
152
- # Debugging: Print schedule after assigning main accounts
153
- print(f"DEBUG: Schedule After Assigning Main Accounts for {shift_name}:")
154
- print(schedule.head())
155
-
156
- # Assign days off based on chatter preferences
157
- schedule = assign_off_days(schedule, chatter_df)
158
-
159
- # Debugging: Print schedule after assigning off days
160
- print(f"DEBUG: Schedule After Assigning Off Days for {shift_name}:")
161
- print(schedule.head())
162
-
163
- # Randomly fill the remaining slots while respecting constraints
164
- schedule = randomly_fill_slots(schedule, chatter_df)
165
-
166
- # Debugging: Print final schedule for the shift
167
- print(f"DEBUG: Final Schedule for {shift_name}:")
168
- print(schedule.head())
169
-
170
- # Save the schedule
171
- schedules[shift_name] = schedule.to_dict(orient="records")
172
-
173
- return schedules
174
-
175
-
176
-
177
-
178
-
179
 
 
 
 
 
180
 
 
181
 
182
 
 
183
  def create_schedule_template(account_data):
184
  """
185
  Create a blank schedule template with required columns.
186
  """
187
- if "Account" not in account_data.columns or "ActiveFans" not in account_data.columns:
188
- raise KeyError("Account data must contain 'Account' and 'ActiveFans' columns.")
189
 
190
- schedule_template = account_data[["Account", "ActiveFans"]].copy()
191
  for day in ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]:
192
- schedule_template[day] = None # Initialize all days as None
193
 
194
  return schedule_template
195
 
196
 
197
-
198
  def assign_main_accounts_to_schedule(schedule, chatter_data):
199
  """
200
  Assign main accounts to the schedule based on chatter data.
201
  """
202
- days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
203
-
204
- # Dynamically detect the correct column for the main account
205
- main_account_col = next(
206
- (col for col in ["Main Account", "Main_Account_x", "Main_Account_y"] if col in chatter_data.columns), None
207
- )
208
-
209
- if not main_account_col:
210
- raise KeyError("Main Account column not found in chatter data.")
211
-
212
- # Iterate over each chatter and assign their main account to the schedule
213
  for _, chatter in chatter_data.iterrows():
214
- chatter_name = chatter["Name"]
215
- main_account = chatter[main_account_col]
216
-
217
- if pd.notnull(main_account):
218
- # Locate the row in the schedule that matches the main account
219
- matching_row = schedule[schedule["Account"].str.lower() == main_account.lower()]
220
-
221
- if not matching_row.empty:
222
- row_index = matching_row.index[0]
223
-
224
- # Assign the chatter's name to all days where the slot is empty
225
- for day in days_of_week:
226
- if pd.isnull(schedule.at[row_index, day]):
227
- schedule.at[row_index, day] = chatter_name
228
-
229
- # Debugging: Output updated schedule for verification
230
- print("DEBUG: Updated Schedule after assigning main accounts:")
231
- print(schedule)
232
 
233
  return schedule
234
 
235
 
236
-
237
-
238
-
239
- def clean_chatter_data(chatter_data):
240
- """
241
- Clean and prepare chatter data for scheduling.
242
- """
243
- # Merge any duplicate 'Main Account' columns
244
- if "Main Account_x" in chatter_data.columns and "Main Account_y" in chatter_data.columns:
245
- chatter_data["Main Account"] = chatter_data["Main Account_x"].fillna(chatter_data["Main Account_y"])
246
- chatter_data.drop(columns=["Main Account_x", "Main Account_y"], inplace=True)
247
- elif "Main Account_x" in chatter_data.columns:
248
- chatter_data.rename(columns={"Main Account_x": "Main Account"}, inplace=True)
249
- elif "Main Account_y" in chatter_data.columns:
250
- chatter_data.rename(columns={"Main Account_y": "Main Account"}, inplace=True)
251
-
252
- # Validate required columns
253
- required_columns = ["Name", "Main Account", "Final Rating", "Available Work Days"]
254
- for col in required_columns:
255
- if col not in chatter_data.columns:
256
- raise KeyError(f"Missing required column in chatter data: {col}")
257
-
258
- # Clean and format other data fields if needed
259
- chatter_data["WorkDays"] = pd.to_numeric(chatter_data.get("Available Work Days", 6), errors="coerce").fillna(6).astype(int)
260
- chatter_data["Desired Off Day"] = chatter_data["Desired Off Day"].fillna("").apply(
261
- lambda x: [day.strip().capitalize() for day in re.split(r"[ ,]+", x) if day.strip()]
262
- )
263
-
264
- return chatter_data
265
-
266
-
267
  def assign_off_days(schedule, chatter_data):
268
  """
269
  Assign days off for each chatter based on their 'Desired Off Day' field.
270
  """
271
- if "Desired Off Day" not in chatter_data.columns:
272
- chatter_data["Desired Off Day"] = ""
273
-
274
- days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
275
-
276
  for _, chatter in chatter_data.iterrows():
277
- chatter_name = chatter["Name"]
278
- desired_off_days = chatter["Desired Off Day"]
279
-
280
- # Ensure desired_off_days is parsed into a list
281
- if isinstance(desired_off_days, str):
282
- desired_off_days = [
283
- day.strip().capitalize()
284
- for day in desired_off_days.split(",")
285
- if day.strip().capitalize() in days_of_week
286
- ]
287
-
288
- # Assign None to the schedule for each desired off day
289
- for day in desired_off_days:
290
- if day in days_of_week:
291
- schedule.loc[schedule[day] == chatter_name, day] = None
292
-
293
- # Debugging: Verify schedule after assigning off days
294
- print("DEBUG: Schedule After Assigning Off Days:")
295
- print(schedule.head())
296
-
297
  return schedule
298
 
 
 
299
  def randomly_fill_slots(schedule, chatter_data, max_accounts_per_day=3, max_fans_per_day=1000):
300
  """
301
  Randomly fill remaining slots in the schedule while respecting constraints.
302
  """
303
  days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
304
- daily_accounts = {chatter: {day: 0 for day in days_of_week} for chatter in chatter_data["Name"]}
305
- daily_fans = {chatter: {day: 0 for day in days_of_week} for chatter in chatter_data["Name"]}
306
  chatters_list = chatter_data["Name"].tolist()
307
 
308
  for day in days_of_week:
309
- for i, row in schedule.iterrows():
310
- if pd.isnull(schedule.at[i, day]): # If the slot is empty
311
- random.shuffle(chatters_list) # Shuffle chatters to randomize assignments
312
  for chatter in chatters_list:
313
- active_fans = row["ActiveFans"]
314
- if (
315
- daily_accounts[chatter][day] < max_accounts_per_day and
316
- daily_fans[chatter][day] + active_fans <= max_fans_per_day
317
- ):
318
- schedule.at[i, day] = chatter
319
- daily_accounts[chatter][day] += 1
320
- daily_fans[chatter][day] += active_fans
321
- break
322
-
323
- # Debugging: Verify schedule after filling slots
324
- print("DEBUG: Schedule After Randomly Filling Slots:")
325
- print(schedule.head())
326
-
327
- return schedule
 
 
 
 
 
 
 
 
1
  import os
2
+ import pandas as pd
3
  import random
4
  import re
 
 
 
 
 
5
  from sklearn.preprocessing import MinMaxScaler
6
 
7
+ # Function to assign main accounts
8
  def assign_main_accounts(creators_file, chatter_files):
9
  creators = pd.read_excel(creators_file)
10
  creators.columns = creators.columns.str.strip()
 
12
  # Debugging: Check initial columns
13
  print("DEBUG: Initial Columns in Creator File:", creators.columns)
14
 
15
+ # Standardize column names
16
  column_mapping = {
17
  "Creator": "Creator",
18
  "Total earnings": "Total earnings",
 
35
  creators["Subscription"] = creators["Subscription"].replace("[\$,]", "", regex=True).astype(float)
36
  creators["ActiveFans"] = pd.to_numeric(creators["ActiveFans"], errors="coerce").fillna(0)
37
 
38
+ # Normalize data
39
  scaler = MinMaxScaler()
40
  creators[["Earnings_Normalized", "Subscriptions_Normalized"]] = scaler.fit_transform(
41
  creators[["Total earnings", "Subscription"]]
 
50
  processed_creator_file = creators[["Creator", "ActiveFans"]]
51
 
52
  updated_chatter_files = []
53
+ for chatter_file in chatter_files:
 
 
54
  chatters = pd.read_excel(chatter_file)
55
+ chatters.columns = chatters.columns.str.strip()
56
+ if len(chatters) > len(creators):
57
+ raise ValueError("Not enough creators to assign to all chatters.")
58
+
59
+ # Assign creators to chatters
60
  chatters["Main Account"] = creators.iloc[:len(chatters)]["Creator"].values
61
  updated_chatter_files.append(chatters)
 
 
 
 
 
 
 
 
 
 
62
 
63
+ return updated_chatter_files, processed_creator_file
64
 
65
 
66
  def save_processed_files(assignments, output_dir):
67
  """
68
+ Save processed chatter files to the output directory.
69
  """
70
+ for idx, (shift, data) in enumerate(assignments.items()):
71
+ output_file = os.path.join(output_dir, f"Updated_{shift.lower()}_file.xlsx")
72
+ data.to_excel(output_file, index=False)
73
+ print(f"Saved {shift} file to {output_file}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
 
76
+ # Function to clean chatter data
77
+ def clean_chatter_data(chatter_data):
 
78
  """
79
+ Clean and prepare chatter data for scheduling.
80
  """
81
+ required_columns = ["Name", "Main Account", "Final Rating", "Available Work Days"]
82
+ for col in required_columns:
83
+ if col not in chatter_data.columns:
84
+ raise KeyError(f"Missing required column in chatter data: {col}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
+ chatter_data["WorkDays"] = pd.to_numeric(chatter_data.get("Available Work Days", 6), errors="coerce").fillna(6).astype(int)
87
+ chatter_data["Desired Off Day"] = chatter_data["Desired Off Day"].fillna("").apply(
88
+ lambda x: [day.strip().capitalize() for day in re.split(r"[ ,]+", x) if day.strip()]
89
+ )
90
 
91
+ return chatter_data
92
 
93
 
94
+ # Function to create a blank schedule template
95
  def create_schedule_template(account_data):
96
  """
97
  Create a blank schedule template with required columns.
98
  """
99
+ if "Creator" not in account_data.columns or "ActiveFans" not in account_data.columns:
100
+ raise KeyError("Account data must contain 'Creator' and 'ActiveFans' columns.")
101
 
102
+ schedule_template = account_data[["Creator", "ActiveFans"]].copy()
103
  for day in ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]:
104
+ schedule_template[day] = None
105
 
106
  return schedule_template
107
 
108
 
109
+ # Function to assign main accounts to the schedule
110
  def assign_main_accounts_to_schedule(schedule, chatter_data):
111
  """
112
  Assign main accounts to the schedule based on chatter data.
113
  """
 
 
 
 
 
 
 
 
 
 
 
114
  for _, chatter in chatter_data.iterrows():
115
+ main_account = chatter["Main Account"]
116
+ if main_account in schedule["Creator"].values:
117
+ idx = schedule[schedule["Creator"] == main_account].index[0]
118
+ for day in schedule.columns[2:]:
119
+ schedule.at[idx, day] = chatter["Name"]
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
  return schedule
122
 
123
 
124
+ # Function to assign off days
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  def assign_off_days(schedule, chatter_data):
126
  """
127
  Assign days off for each chatter based on their 'Desired Off Day' field.
128
  """
 
 
 
 
 
129
  for _, chatter in chatter_data.iterrows():
130
+ for off_day in chatter["Desired Off Day"]:
131
+ if off_day in schedule.columns[2:]:
132
+ schedule.loc[schedule[off_day] == chatter["Name"], off_day] = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  return schedule
134
 
135
+
136
+ # Function to randomly fill schedule slots
137
  def randomly_fill_slots(schedule, chatter_data, max_accounts_per_day=3, max_fans_per_day=1000):
138
  """
139
  Randomly fill remaining slots in the schedule while respecting constraints.
140
  """
141
  days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
 
 
142
  chatters_list = chatter_data["Name"].tolist()
143
 
144
  for day in days_of_week:
145
+ for idx, row in schedule.iterrows():
146
+ if pd.isnull(schedule.at[idx, day]):
147
+ random.shuffle(chatters_list)
148
  for chatter in chatters_list:
149
+ schedule.at[idx, day] = chatter
150
+ break
151
+
152
+ return schedule
153
+
154
+
155
+ # Main schedule generation function
156
+ def generate_schedule(chatter_files, account_data):
157
+ schedules = {}
158
+ shift_names = ["Overnight", "Day", "Prime"]
159
+
160
+ for idx, chatter_df in enumerate(chatter_files):
161
+ shift_name = shift_names[idx]
162
+ chatter_df = clean_chatter_data(chatter_df)
163
+ schedule = create_schedule_template(account_data)
164
+ schedule = assign_main_accounts_to_schedule(schedule, chatter_df)
165
+ schedule = assign_off_days(schedule, chatter_df)
166
+ schedule = randomly_fill_slots(schedule, chatter_df)
167
+ schedules[shift_name] = schedule
168
+
169
+ return schedules