Merry99 commited on
Commit
8d5bdc7
Β·
1 Parent(s): 2cd42be

windows standard

Browse files
Files changed (1) hide show
  1. app.py +26 -7
app.py CHANGED
@@ -254,21 +254,40 @@ async def upload_batch_dataset(payload: BatchUploadPayload):
254
  existing = DatasetDict()
255
  print("πŸ“‚ κΈ°μ‘΄ repo μ—†μŒ β†’ μƒˆλ‘œ 생성")
256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  # ν˜„μž¬ μ‚¬μš©μžλ§Œ μ—…λ°μ΄νŠΈ
258
  for user_id, records in user_data_groups.items():
259
  try:
260
- df = pd.DataFrame(records)
261
- new_dataset = Dataset.from_pandas(df)
 
 
262
 
263
  if user_id in existing:
264
- # κΈ°μ‘΄ λ°μ΄ν„°ν”„λ ˆμž„κ³Ό 병합
265
  old_df = existing[user_id].to_pandas()
266
- merged = pd.concat([old_df, df], ignore_index=True)
267
- existing[user_id] = Dataset.from_pandas(merged)
268
- print(f"πŸ“Š {user_id}: κΈ°μ‘΄ 데이터와 병합 ({len(old_df)} + {len(df)} = {len(merged)}개 λ ˆμ½”λ“œ)")
 
 
 
269
  else:
270
  existing[user_id] = new_dataset
271
- print(f"πŸ“Š {user_id}: μ‹ κ·œ 데이터 μΆ”κ°€ ({len(df)}개 λ ˆμ½”λ“œ)")
272
 
273
  results[user_id] = {
274
  "status": "success",
 
254
  existing = DatasetDict()
255
  print("πŸ“‚ κΈ°μ‘΄ repo μ—†μŒ β†’ μƒˆλ‘œ 생성")
256
 
257
+ def normalize_windows(record):
258
+ """windows 데이터λ₯Ό λ¬Έμžμ—΄ 리슀트둜 μ •κ·œν™”"""
259
+ w = record.get("windows")
260
+ if isinstance(w, list):
261
+ if len(w) > 0 and isinstance(w[0], dict):
262
+ record["windows"] = [str(v) for d in w for v in d.values()]
263
+ else:
264
+ record["windows"] = [str(x) for x in w]
265
+ elif isinstance(w, dict):
266
+ record["windows"] = [str(v) for v in w.values()]
267
+ else:
268
+ record["windows"] = []
269
+ return record
270
+
271
  # ν˜„μž¬ μ‚¬μš©μžλ§Œ μ—…λ°μ΄νŠΈ
272
  for user_id, records in user_data_groups.items():
273
  try:
274
+ # μƒˆ 데이터 μ •κ·œν™”
275
+ normalized = [normalize_windows(r) for r in records]
276
+ new_df = pd.DataFrame(normalized)
277
+ new_dataset = Dataset.from_pandas(new_df)
278
 
279
  if user_id in existing:
280
+ # κΈ°μ‘΄ 데이터 μ •κ·œν™” 및 병합
281
  old_df = existing[user_id].to_pandas()
282
+ old_df["windows"] = old_df["windows"].apply(
283
+ lambda w: [str(v) for v in w] if isinstance(w, list) else []
284
+ )
285
+ merged_df = pd.concat([old_df, new_df], ignore_index=True)
286
+ existing[user_id] = Dataset.from_pandas(merged_df)
287
+ print(f"πŸ“Š {user_id}: κΈ°μ‘΄ 데이터와 병합 ({len(old_df)} + {len(new_df)} = {len(merged_df)}개 λ ˆμ½”λ“œ)")
288
  else:
289
  existing[user_id] = new_dataset
290
+ print(f"πŸ“Š {user_id}: μ‹ κ·œ 데이터 μΆ”κ°€ ({len(new_df)}개 λ ˆμ½”λ“œ)")
291
 
292
  results[user_id] = {
293
  "status": "success",