Spaces:
Sleeping
Sleeping
update_batch_dataset: rm window list
Browse files
app.py
CHANGED
|
@@ -64,7 +64,6 @@ class BatchDataItem(BaseModel):
|
|
| 64 |
fatigue: float
|
| 65 |
mode: str
|
| 66 |
window_count: int
|
| 67 |
-
windows: List[dict] = Field(default_factory=list)
|
| 68 |
measurement_count: int
|
| 69 |
|
| 70 |
class BatchUploadPayload(BaseModel):
|
|
@@ -235,7 +234,6 @@ async def upload_batch_dataset(payload: BatchUploadPayload):
|
|
| 235 |
"fatigue": item.fatigue,
|
| 236 |
"mode": item.mode,
|
| 237 |
"window_count": item.window_count,
|
| 238 |
-
"windows": item.windows,
|
| 239 |
"measurement_count": item.measurement_count,
|
| 240 |
"batch_date": payload.batch_date,
|
| 241 |
"batch_size": payload.batch_size,
|
|
@@ -255,10 +253,6 @@ async def upload_batch_dataset(payload: BatchUploadPayload):
|
|
| 255 |
new_existing = DatasetDict()
|
| 256 |
for user_id in existing.keys():
|
| 257 |
df = existing[user_id].to_pandas()
|
| 258 |
-
# windows ํ๋๋ฅผ ๋ฌธ์์ด ๋ฆฌ์คํธ๋ก ๊ฐ์ ๋ณํ
|
| 259 |
-
df["windows"] = df["windows"].apply(
|
| 260 |
-
lambda w: [str(v) for v in w] if isinstance(w, list) and len(w) > 0 else []
|
| 261 |
-
)
|
| 262 |
# ๋ชจ๋ ๋ฐ์ดํฐ๋ฅผ ์๋ก ์์ฑํ์ฌ ์คํค๋ง ํต์ผ
|
| 263 |
new_existing[user_id] = df_to_dataset(df)
|
| 264 |
print(f"๐ง {user_id}: ๊ธฐ์กด ๋ฐ์ดํฐ ์ฌ์์ฑ ์๋ฃ")
|
|
@@ -268,35 +262,16 @@ async def upload_batch_dataset(payload: BatchUploadPayload):
|
|
| 268 |
existing = DatasetDict()
|
| 269 |
print("๐ ๊ธฐ์กด repo ์์ โ ์๋ก ์์ฑ")
|
| 270 |
|
| 271 |
-
def normalize_windows(record):
|
| 272 |
-
"""windows ๋ฐ์ดํฐ๋ฅผ ๋ฌธ์์ด ๋ฆฌ์คํธ๋ก ์ ๊ทํ"""
|
| 273 |
-
w = record.get("windows")
|
| 274 |
-
if isinstance(w, list):
|
| 275 |
-
if len(w) > 0 and isinstance(w[0], dict):
|
| 276 |
-
record["windows"] = [str(v) for d in w for v in d.values()]
|
| 277 |
-
else:
|
| 278 |
-
record["windows"] = [str(x) for x in w]
|
| 279 |
-
elif isinstance(w, dict):
|
| 280 |
-
record["windows"] = [str(v) for v in w.values()]
|
| 281 |
-
else:
|
| 282 |
-
record["windows"] = []
|
| 283 |
-
return record
|
| 284 |
-
|
| 285 |
# ํ์ฌ ์ฌ์ฉ์๋ง ์
๋ฐ์ดํธ
|
| 286 |
for user_id, records in user_data_groups.items():
|
| 287 |
try:
|
| 288 |
-
# ์ ๋ฐ์ดํฐ
|
| 289 |
-
|
| 290 |
-
new_df = pd.DataFrame(normalized)
|
| 291 |
new_dataset = df_to_dataset(new_df)
|
| 292 |
|
| 293 |
if user_id in existing:
|
| 294 |
-
# ๊ธฐ์กด ๋ฐ์ดํฐ
|
| 295 |
old_df = existing[user_id].to_pandas()
|
| 296 |
-
# ๊ธฐ์กด windows ๋ฐ์ดํฐ๋ฅผ ๋ฌธ์์ด ๋ฆฌ์คํธ๋ก ์ ๊ทํ
|
| 297 |
-
old_df["windows"] = old_df["windows"].apply(
|
| 298 |
-
lambda w: [str(v) for v in w] if isinstance(w, list) and len(w) > 0 else []
|
| 299 |
-
)
|
| 300 |
merged_df = pd.concat([old_df, new_df], ignore_index=True)
|
| 301 |
existing[user_id] = df_to_dataset(merged_df)
|
| 302 |
print(f"๐ {user_id}: ๊ธฐ์กด ๋ฐ์ดํฐ์ ๋ณํฉ ({len(old_df)} + {len(new_df)} = {len(merged_df)}๊ฐ ๋ ์ฝ๋)")
|
|
@@ -340,37 +315,7 @@ async def upload_batch_dataset(payload: BatchUploadPayload):
|
|
| 340 |
print(f"โ ๋ฐฐ์น ํธ์ ์คํจ: {e}")
|
| 341 |
raise HTTPException(status_code=500, detail=f"๋ฐฐ์น ํธ์ ์คํจ: {str(e)}")
|
| 342 |
|
| 343 |
-
def normalize_windows(record):
|
| 344 |
-
w = record.get("windows", [])
|
| 345 |
-
result = []
|
| 346 |
-
|
| 347 |
-
if isinstance(w, list):
|
| 348 |
-
for item in w:
|
| 349 |
-
if isinstance(item, dict):
|
| 350 |
-
# ๋์
๋๋ฆฌ์ ๋ชจ๋ ๊ฐ๋ค์ ๋ฌธ์์ด๋ก ๋ณํ
|
| 351 |
-
for v in item.values():
|
| 352 |
-
if v is not None and str(v).strip():
|
| 353 |
-
result.append(str(v))
|
| 354 |
-
elif item is not None and str(item).strip():
|
| 355 |
-
result.append(str(item))
|
| 356 |
-
elif isinstance(w, dict):
|
| 357 |
-
# ๋์
๋๋ฆฌ์ ๋ชจ๋ ๊ฐ๋ค์ ๋ฌธ์์ด๋ก ๋ณํ
|
| 358 |
-
for v in w.values():
|
| 359 |
-
if v is not None and str(v).strip():
|
| 360 |
-
result.append(str(v))
|
| 361 |
-
else:
|
| 362 |
-
# windows๊ฐ ์๊ฑฐ๋ ๋ค๋ฅธ ํ์
์ธ ๊ฒฝ์ฐ ๋น ๋ฆฌ์คํธ
|
| 363 |
-
result = []
|
| 364 |
-
|
| 365 |
-
record["windows"] = result
|
| 366 |
-
print(f"๐ Windows ์ ๊ทํ: {w} โ {result}")
|
| 367 |
-
return record
|
| 368 |
|
| 369 |
def df_to_dataset(df):
|
| 370 |
-
"""DataFrame์ Dataset์ผ๋ก ๋ณํ
|
| 371 |
-
# windows ํ๋๊ฐ ๋ฆฌ์คํธ์ธ์ง ํ์ธํ๊ณ ์ ๊ทํ
|
| 372 |
-
if 'windows' in df.columns:
|
| 373 |
-
df['windows'] = df['windows'].apply(
|
| 374 |
-
lambda x: x if isinstance(x, list) else []
|
| 375 |
-
)
|
| 376 |
return Dataset.from_pandas(df)
|
|
|
|
| 64 |
fatigue: float
|
| 65 |
mode: str
|
| 66 |
window_count: int
|
|
|
|
| 67 |
measurement_count: int
|
| 68 |
|
| 69 |
class BatchUploadPayload(BaseModel):
|
|
|
|
| 234 |
"fatigue": item.fatigue,
|
| 235 |
"mode": item.mode,
|
| 236 |
"window_count": item.window_count,
|
|
|
|
| 237 |
"measurement_count": item.measurement_count,
|
| 238 |
"batch_date": payload.batch_date,
|
| 239 |
"batch_size": payload.batch_size,
|
|
|
|
| 253 |
new_existing = DatasetDict()
|
| 254 |
for user_id in existing.keys():
|
| 255 |
df = existing[user_id].to_pandas()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
# ๋ชจ๋ ๋ฐ์ดํฐ๋ฅผ ์๋ก ์์ฑํ์ฌ ์คํค๋ง ํต์ผ
|
| 257 |
new_existing[user_id] = df_to_dataset(df)
|
| 258 |
print(f"๐ง {user_id}: ๊ธฐ์กด ๋ฐ์ดํฐ ์ฌ์์ฑ ์๋ฃ")
|
|
|
|
| 262 |
existing = DatasetDict()
|
| 263 |
print("๐ ๊ธฐ์กด repo ์์ โ ์๋ก ์์ฑ")
|
| 264 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
# ํ์ฌ ์ฌ์ฉ์๋ง ์
๋ฐ์ดํธ
|
| 266 |
for user_id, records in user_data_groups.items():
|
| 267 |
try:
|
| 268 |
+
# ์ ๋ฐ์ดํฐ ์ฒ๋ฆฌ
|
| 269 |
+
new_df = pd.DataFrame(records)
|
|
|
|
| 270 |
new_dataset = df_to_dataset(new_df)
|
| 271 |
|
| 272 |
if user_id in existing:
|
| 273 |
+
# ๊ธฐ์กด ๋ฐ์ดํฐ์ ๋ณํฉ
|
| 274 |
old_df = existing[user_id].to_pandas()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
merged_df = pd.concat([old_df, new_df], ignore_index=True)
|
| 276 |
existing[user_id] = df_to_dataset(merged_df)
|
| 277 |
print(f"๐ {user_id}: ๊ธฐ์กด ๋ฐ์ดํฐ์ ๋ณํฉ ({len(old_df)} + {len(new_df)} = {len(merged_df)}๊ฐ ๋ ์ฝ๋)")
|
|
|
|
| 315 |
print(f"โ ๋ฐฐ์น ํธ์ ์คํจ: {e}")
|
| 316 |
raise HTTPException(status_code=500, detail=f"๋ฐฐ์น ํธ์ ์คํจ: {str(e)}")
|
| 317 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
|
| 319 |
def df_to_dataset(df):
|
| 320 |
+
"""DataFrame์ Dataset์ผ๋ก ๋ณํ"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 321 |
return Dataset.from_pandas(df)
|