muhalwan commited on
Commit
bd4c80f
·
1 Parent(s): 6a0a429
Files changed (1) hide show
  1. data_processor.py +7 -7
data_processor.py CHANGED
@@ -195,7 +195,7 @@ class DataProcessor:
195
 
196
  # Remove rows with missing critical data
197
  before_dropna = len(courses)
198
- courses = courses.dropna(subset=["kode_mk", "kategori_mk"])
199
  if len(courses) < before_dropna:
200
  logger.info(
201
  f" Removed {before_dropna - len(courses)} rows with missing kode_mk or kategori_mk"
@@ -228,19 +228,19 @@ class DataProcessor:
228
  initial_count = len(students)
229
 
230
  # Remove rows with missing critical data
231
- students = students.dropna(subset=["kode_mk", "thn", "smt", "kode_mhs"])
232
  if len(students) < initial_count:
233
  logger.info(
234
  f" Removed {initial_count - len(students)} rows with missing critical data"
235
  )
236
 
237
  # Ensure correct data types
238
- students["thn"] = pd.to_numeric(students["thn"], errors="coerce")
239
- students["smt"] = pd.to_numeric(students["smt"], errors="coerce")
240
 
241
  # Remove rows with invalid year/semester after conversion
242
  before_invalid = len(students)
243
- students = students.dropna(subset=["thn", "smt"])
244
  if len(students) < before_invalid:
245
  logger.info(
246
  f" Removed {before_invalid - len(students)} rows with invalid year/semester values"
@@ -253,7 +253,7 @@ class DataProcessor:
253
  logger.warning(
254
  f" Found {invalid_sem.sum()} records with invalid semester values"
255
  )
256
- students = students[~invalid_sem]
257
 
258
  # Validate year range
259
  current_year = pd.Timestamp.now().year
@@ -262,7 +262,7 @@ class DataProcessor:
262
  logger.warning(
263
  f" Found {invalid_year.sum()} records with unreasonable year values"
264
  )
265
- students = students[~invalid_year]
266
 
267
  # Remove exact duplicate enrollments (same student, course, semester)
268
  before_dedup = len(students)
 
195
 
196
  # Remove rows with missing critical data
197
  before_dropna = len(courses)
198
+ courses = courses.dropna(subset=["kode_mk", "kategori_mk"]).copy()
199
  if len(courses) < before_dropna:
200
  logger.info(
201
  f" Removed {before_dropna - len(courses)} rows with missing kode_mk or kategori_mk"
 
228
  initial_count = len(students)
229
 
230
  # Remove rows with missing critical data
231
+ students = students.dropna(subset=["kode_mk", "thn", "smt", "kode_mhs"]).copy()
232
  if len(students) < initial_count:
233
  logger.info(
234
  f" Removed {initial_count - len(students)} rows with missing critical data"
235
  )
236
 
237
  # Ensure correct data types
238
+ students.loc[:, "thn"] = pd.to_numeric(students["thn"], errors="coerce")
239
+ students.loc[:, "smt"] = pd.to_numeric(students["smt"], errors="coerce")
240
 
241
  # Remove rows with invalid year/semester after conversion
242
  before_invalid = len(students)
243
+ students = students.dropna(subset=["thn", "smt"]).copy()
244
  if len(students) < before_invalid:
245
  logger.info(
246
  f" Removed {before_invalid - len(students)} rows with invalid year/semester values"
 
253
  logger.warning(
254
  f" Found {invalid_sem.sum()} records with invalid semester values"
255
  )
256
+ students = students[~invalid_sem].copy()
257
 
258
  # Validate year range
259
  current_year = pd.Timestamp.now().year
 
262
  logger.warning(
263
  f" Found {invalid_year.sum()} records with unreasonable year values"
264
  )
265
+ students = students[~invalid_year].copy()
266
 
267
  # Remove exact duplicate enrollments (same student, course, semester)
268
  before_dedup = len(students)