Spaces:
Sleeping
Sleeping
windows standard
Browse files
app.py
CHANGED
|
@@ -254,21 +254,40 @@ async def upload_batch_dataset(payload: BatchUploadPayload):
|
|
| 254 |
existing = DatasetDict()
|
| 255 |
print("π κΈ°μ‘΄ repo μμ β μλ‘ μμ±")
|
| 256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
# νμ¬ μ¬μ©μλ§ μ
λ°μ΄νΈ
|
| 258 |
for user_id, records in user_data_groups.items():
|
| 259 |
try:
|
| 260 |
-
|
| 261 |
-
|
|
|
|
|
|
|
| 262 |
|
| 263 |
if user_id in existing:
|
| 264 |
-
# κΈ°μ‘΄ λ°μ΄ν°
|
| 265 |
old_df = existing[user_id].to_pandas()
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
|
|
|
|
|
|
|
|
|
| 269 |
else:
|
| 270 |
existing[user_id] = new_dataset
|
| 271 |
-
print(f"π {user_id}: μ κ· λ°μ΄ν° μΆκ° ({len(
|
| 272 |
|
| 273 |
results[user_id] = {
|
| 274 |
"status": "success",
|
|
|
|
| 254 |
existing = DatasetDict()
|
| 255 |
print("π κΈ°μ‘΄ repo μμ β μλ‘ μμ±")
|
| 256 |
|
| 257 |
+
def normalize_windows(record):
|
| 258 |
+
"""windows λ°μ΄ν°λ₯Ό λ¬Έμμ΄ λ¦¬μ€νΈλ‘ μ κ·ν"""
|
| 259 |
+
w = record.get("windows")
|
| 260 |
+
if isinstance(w, list):
|
| 261 |
+
if len(w) > 0 and isinstance(w[0], dict):
|
| 262 |
+
record["windows"] = [str(v) for d in w for v in d.values()]
|
| 263 |
+
else:
|
| 264 |
+
record["windows"] = [str(x) for x in w]
|
| 265 |
+
elif isinstance(w, dict):
|
| 266 |
+
record["windows"] = [str(v) for v in w.values()]
|
| 267 |
+
else:
|
| 268 |
+
record["windows"] = []
|
| 269 |
+
return record
|
| 270 |
+
|
| 271 |
# νμ¬ μ¬μ©μλ§ μ
λ°μ΄νΈ
|
| 272 |
for user_id, records in user_data_groups.items():
|
| 273 |
try:
|
| 274 |
+
# μ λ°μ΄ν° μ κ·ν
|
| 275 |
+
normalized = [normalize_windows(r) for r in records]
|
| 276 |
+
new_df = pd.DataFrame(normalized)
|
| 277 |
+
new_dataset = Dataset.from_pandas(new_df)
|
| 278 |
|
| 279 |
if user_id in existing:
|
| 280 |
+
# κΈ°μ‘΄ λ°μ΄ν° μ κ·ν λ° λ³ν©
|
| 281 |
old_df = existing[user_id].to_pandas()
|
| 282 |
+
old_df["windows"] = old_df["windows"].apply(
|
| 283 |
+
lambda w: [str(v) for v in w] if isinstance(w, list) else []
|
| 284 |
+
)
|
| 285 |
+
merged_df = pd.concat([old_df, new_df], ignore_index=True)
|
| 286 |
+
existing[user_id] = Dataset.from_pandas(merged_df)
|
| 287 |
+
print(f"π {user_id}: κΈ°μ‘΄ λ°μ΄ν°μ λ³ν© ({len(old_df)} + {len(new_df)} = {len(merged_df)}κ° λ μ½λ)")
|
| 288 |
else:
|
| 289 |
existing[user_id] = new_dataset
|
| 290 |
+
print(f"π {user_id}: μ κ· λ°μ΄ν° μΆκ° ({len(new_df)}κ° λ μ½λ)")
|
| 291 |
|
| 292 |
results[user_id] = {
|
| 293 |
"status": "success",
|