Merry99 commited on
Commit
2fafcc5
ยท
1 Parent(s): ead1bf0

update_batch_dataset: rm window list

Browse files
Files changed (1) hide show
  1. app.py +4 -59
app.py CHANGED
@@ -64,7 +64,6 @@ class BatchDataItem(BaseModel):
64
  fatigue: float
65
  mode: str
66
  window_count: int
67
- windows: List[dict] = Field(default_factory=list)
68
  measurement_count: int
69
 
70
  class BatchUploadPayload(BaseModel):
@@ -235,7 +234,6 @@ async def upload_batch_dataset(payload: BatchUploadPayload):
235
  "fatigue": item.fatigue,
236
  "mode": item.mode,
237
  "window_count": item.window_count,
238
- "windows": item.windows,
239
  "measurement_count": item.measurement_count,
240
  "batch_date": payload.batch_date,
241
  "batch_size": payload.batch_size,
@@ -255,10 +253,6 @@ async def upload_batch_dataset(payload: BatchUploadPayload):
255
  new_existing = DatasetDict()
256
  for user_id in existing.keys():
257
  df = existing[user_id].to_pandas()
258
- # windows ํ•„๋“œ๋ฅผ ๋ฌธ์ž์—ด ๋ฆฌ์ŠคํŠธ๋กœ ๊ฐ•์ œ ๋ณ€ํ™˜
259
- df["windows"] = df["windows"].apply(
260
- lambda w: [str(v) for v in w] if isinstance(w, list) and len(w) > 0 else []
261
- )
262
  # ๋ชจ๋“  ๋ฐ์ดํ„ฐ๋ฅผ ์ƒˆ๋กœ ์ƒ์„ฑํ•˜์—ฌ ์Šคํ‚ค๋งˆ ํ†ต์ผ
263
  new_existing[user_id] = df_to_dataset(df)
264
  print(f"๐Ÿ”ง {user_id}: ๊ธฐ์กด ๋ฐ์ดํ„ฐ ์žฌ์ƒ์„ฑ ์™„๋ฃŒ")
@@ -268,35 +262,16 @@ async def upload_batch_dataset(payload: BatchUploadPayload):
268
  existing = DatasetDict()
269
  print("๐Ÿ“‚ ๊ธฐ์กด repo ์—†์Œ โ†’ ์ƒˆ๋กœ ์ƒ์„ฑ")
270
 
271
- def normalize_windows(record):
272
- """windows ๋ฐ์ดํ„ฐ๋ฅผ ๋ฌธ์ž์—ด ๋ฆฌ์ŠคํŠธ๋กœ ์ •๊ทœํ™”"""
273
- w = record.get("windows")
274
- if isinstance(w, list):
275
- if len(w) > 0 and isinstance(w[0], dict):
276
- record["windows"] = [str(v) for d in w for v in d.values()]
277
- else:
278
- record["windows"] = [str(x) for x in w]
279
- elif isinstance(w, dict):
280
- record["windows"] = [str(v) for v in w.values()]
281
- else:
282
- record["windows"] = []
283
- return record
284
-
285
  # ํ˜„์žฌ ์‚ฌ์šฉ์ž๋งŒ ์—…๋ฐ์ดํŠธ
286
  for user_id, records in user_data_groups.items():
287
  try:
288
- # ์ƒˆ ๋ฐ์ดํ„ฐ ์ •๊ทœํ™”
289
- normalized = [normalize_windows(r) for r in records]
290
- new_df = pd.DataFrame(normalized)
291
  new_dataset = df_to_dataset(new_df)
292
 
293
  if user_id in existing:
294
- # ๊ธฐ์กด ๋ฐ์ดํ„ฐ ์ •๊ทœํ™” ๋ฐ ๋ณ‘ํ•ฉ
295
  old_df = existing[user_id].to_pandas()
296
- # ๊ธฐ์กด windows ๋ฐ์ดํ„ฐ๋ฅผ ๋ฌธ์ž์—ด ๋ฆฌ์ŠคํŠธ๋กœ ์ •๊ทœํ™”
297
- old_df["windows"] = old_df["windows"].apply(
298
- lambda w: [str(v) for v in w] if isinstance(w, list) and len(w) > 0 else []
299
- )
300
  merged_df = pd.concat([old_df, new_df], ignore_index=True)
301
  existing[user_id] = df_to_dataset(merged_df)
302
  print(f"๐Ÿ“Š {user_id}: ๊ธฐ์กด ๋ฐ์ดํ„ฐ์™€ ๋ณ‘ํ•ฉ ({len(old_df)} + {len(new_df)} = {len(merged_df)}๊ฐœ ๋ ˆ์ฝ”๋“œ)")
@@ -340,37 +315,7 @@ async def upload_batch_dataset(payload: BatchUploadPayload):
340
  print(f"โŒ ๋ฐฐ์น˜ ํ‘ธ์‹œ ์‹คํŒจ: {e}")
341
  raise HTTPException(status_code=500, detail=f"๋ฐฐ์น˜ ํ‘ธ์‹œ ์‹คํŒจ: {str(e)}")
342
 
343
- def normalize_windows(record):
344
- w = record.get("windows", [])
345
- result = []
346
-
347
- if isinstance(w, list):
348
- for item in w:
349
- if isinstance(item, dict):
350
- # ๋”•์…”๋„ˆ๋ฆฌ์˜ ๋ชจ๋“  ๊ฐ’๋“ค์„ ๋ฌธ์ž์—ด๋กœ ๋ณ€ํ™˜
351
- for v in item.values():
352
- if v is not None and str(v).strip():
353
- result.append(str(v))
354
- elif item is not None and str(item).strip():
355
- result.append(str(item))
356
- elif isinstance(w, dict):
357
- # ๋”•์…”๋„ˆ๋ฆฌ์˜ ๋ชจ๋“  ๊ฐ’๋“ค์„ ๋ฌธ์ž์—ด๋กœ ๋ณ€ํ™˜
358
- for v in w.values():
359
- if v is not None and str(v).strip():
360
- result.append(str(v))
361
- else:
362
- # windows๊ฐ€ ์—†๊ฑฐ๋‚˜ ๋‹ค๋ฅธ ํƒ€์ž…์ธ ๊ฒฝ์šฐ ๋นˆ ๋ฆฌ์ŠคํŠธ
363
- result = []
364
-
365
- record["windows"] = result
366
- print(f"๐Ÿ” Windows ์ •๊ทœํ™”: {w} โ†’ {result}")
367
- return record
368
 
369
  def df_to_dataset(df):
370
- """DataFrame์„ Dataset์œผ๋กœ ๋ณ€ํ™˜ (windows ํ•„๋“œ ์ •๊ทœํ™”)"""
371
- # windows ํ•„๋“œ๊ฐ€ ๋ฆฌ์ŠคํŠธ์ธ์ง€ ํ™•์ธํ•˜๊ณ  ์ •๊ทœํ™”
372
- if 'windows' in df.columns:
373
- df['windows'] = df['windows'].apply(
374
- lambda x: x if isinstance(x, list) else []
375
- )
376
  return Dataset.from_pandas(df)
 
64
  fatigue: float
65
  mode: str
66
  window_count: int
 
67
  measurement_count: int
68
 
69
  class BatchUploadPayload(BaseModel):
 
234
  "fatigue": item.fatigue,
235
  "mode": item.mode,
236
  "window_count": item.window_count,
 
237
  "measurement_count": item.measurement_count,
238
  "batch_date": payload.batch_date,
239
  "batch_size": payload.batch_size,
 
253
  new_existing = DatasetDict()
254
  for user_id in existing.keys():
255
  df = existing[user_id].to_pandas()
 
 
 
 
256
  # ๋ชจ๋“  ๋ฐ์ดํ„ฐ๋ฅผ ์ƒˆ๋กœ ์ƒ์„ฑํ•˜์—ฌ ์Šคํ‚ค๋งˆ ํ†ต์ผ
257
  new_existing[user_id] = df_to_dataset(df)
258
  print(f"๐Ÿ”ง {user_id}: ๊ธฐ์กด ๋ฐ์ดํ„ฐ ์žฌ์ƒ์„ฑ ์™„๋ฃŒ")
 
262
  existing = DatasetDict()
263
  print("๐Ÿ“‚ ๊ธฐ์กด repo ์—†์Œ โ†’ ์ƒˆ๋กœ ์ƒ์„ฑ")
264
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  # ํ˜„์žฌ ์‚ฌ์šฉ์ž๋งŒ ์—…๋ฐ์ดํŠธ
266
  for user_id, records in user_data_groups.items():
267
  try:
268
+ # ์ƒˆ ๋ฐ์ดํ„ฐ ์ฒ˜๋ฆฌ
269
+ new_df = pd.DataFrame(records)
 
270
  new_dataset = df_to_dataset(new_df)
271
 
272
  if user_id in existing:
273
+ # ๊ธฐ์กด ๋ฐ์ดํ„ฐ์™€ ๋ณ‘ํ•ฉ
274
  old_df = existing[user_id].to_pandas()
 
 
 
 
275
  merged_df = pd.concat([old_df, new_df], ignore_index=True)
276
  existing[user_id] = df_to_dataset(merged_df)
277
  print(f"๐Ÿ“Š {user_id}: ๊ธฐ์กด ๋ฐ์ดํ„ฐ์™€ ๋ณ‘ํ•ฉ ({len(old_df)} + {len(new_df)} = {len(merged_df)}๊ฐœ ๋ ˆ์ฝ”๋“œ)")
 
315
  print(f"โŒ ๋ฐฐ์น˜ ํ‘ธ์‹œ ์‹คํŒจ: {e}")
316
  raise HTTPException(status_code=500, detail=f"๋ฐฐ์น˜ ํ‘ธ์‹œ ์‹คํŒจ: {str(e)}")
317
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
 
319
  def df_to_dataset(df):
320
+ """DataFrame์„ Dataset์œผ๋กœ ๋ณ€ํ™˜"""
 
 
 
 
 
321
  return Dataset.from_pandas(df)