Merry99 commited on
Commit
5726b96
ยท
1 Parent(s): 8d5bdc7

normalized windows

Browse files
Files changed (1) hide show
  1. app.py +34 -1
app.py CHANGED
@@ -274,7 +274,7 @@ async def upload_batch_dataset(payload: BatchUploadPayload):
274
  # ์ƒˆ ๋ฐ์ดํ„ฐ ์ •๊ทœํ™”
275
  normalized = [normalize_windows(r) for r in records]
276
  new_df = pd.DataFrame(normalized)
277
- new_dataset = Dataset.from_pandas(new_df)
278
 
279
  if user_id in existing:
280
  # ๊ธฐ์กด ๋ฐ์ดํ„ฐ ์ •๊ทœํ™” ๋ฐ ๋ณ‘ํ•ฉ
@@ -325,3 +325,36 @@ async def upload_batch_dataset(payload: BatchUploadPayload):
325
  print(f"โŒ ๋ฐฐ์น˜ ํ‘ธ์‹œ ์‹คํŒจ: {e}")
326
  raise HTTPException(status_code=500, detail=f"๋ฐฐ์น˜ ํ‘ธ์‹œ ์‹คํŒจ: {str(e)}")
327
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  # ์ƒˆ ๋ฐ์ดํ„ฐ ์ •๊ทœํ™”
275
  normalized = [normalize_windows(r) for r in records]
276
  new_df = pd.DataFrame(normalized)
277
+ new_dataset = df_to_dataset(new_df)
278
 
279
  if user_id in existing:
280
  # ๊ธฐ์กด ๋ฐ์ดํ„ฐ ์ •๊ทœํ™” ๋ฐ ๋ณ‘ํ•ฉ
 
325
  print(f"โŒ ๋ฐฐ์น˜ ํ‘ธ์‹œ ์‹คํŒจ: {e}")
326
  raise HTTPException(status_code=500, detail=f"๋ฐฐ์น˜ ํ‘ธ์‹œ ์‹คํŒจ: {str(e)}")
327
 
328
+ def normalize_windows(record):
329
+ w = record.get("windows")
330
+ result = []
331
+
332
+ if isinstance(w, list):
333
+ for item in w:
334
+ if isinstance(item, dict):
335
+ result.extend([str(v) for v in item.values() if v is not None])
336
+ elif item is not None:
337
+ result.append(str(item))
338
+ elif isinstance(w, dict):
339
+ result = [str(v) for v in w.values() if v is not None]
340
+
341
+ record["windows"] = [x for x in result if x not in ("", "None")]
342
+ return record
343
+
344
+ def df_to_dataset(df):
345
+ import pyarrow as pa
346
+ schema = pa.schema([
347
+ ("session_id", pa.string()),
348
+ ("measure_date", pa.string()),
349
+ ("rms", pa.float64()),
350
+ ("freq", pa.float64()),
351
+ ("fatigue", pa.float64()),
352
+ ("mode", pa.string()),
353
+ ("window_count", pa.int64()),
354
+ ("windows", pa.list_(pa.string())),
355
+ ("measurement_count", pa.int64()),
356
+ ("batch_date", pa.string()),
357
+ ("batch_size", pa.int64()),
358
+ ("timestamp", pa.string())
359
+ ])
360
+ return Dataset.from_pandas(df, schema=schema)