Spaces:

Merry99
/

MuscleCare-FastAPI

Sleeping

App Files Files Community

Merry99 commited on Oct 23, 2025

Commit

9668c3f

1 Parent(s): 99fe5a3

fix: log data field

Browse files

Files changed (1) hide show

app.py +57 -140

app.py CHANGED Viewed

@@ -1,12 +1,10 @@
 import os
 import json
 from typing import List, Optional
-from fastapi import FastAPI, HTTPException, Request
 from pydantic import BaseModel, Field, ConfigDict
 import oracledb
 from dotenv import load_dotenv
-import json
-import requests
 import pandas as pd
 from datetime import datetime
 from datasets import Dataset, DatasetDict, load_dataset
@@ -54,23 +52,20 @@ class StatePayload(BaseModel):
     user_emb: Optional[List[float]] = Field(default=None, description="length=12")
     model_version: Optional[str] = None
-# 배치 데이터용 스키마
-class BatchDataItem(BaseModel):
     user_id: str
     session_id: str
     measure_date: str
     rms: float
     freq: float
     fatigue: float
     mode: str
     window_count: int
     measurement_count: int
-class BatchUploadPayload(BaseModel):
-    batch_data: List[BatchDataItem]
-    batch_size: int
-    batch_date: str
 # ----- 유틸 -----
 def clob_json(obj) -> str:
@@ -89,7 +84,7 @@ def root():
             "health_db": "/health/db (DB 연결 체크)",
             "docs": "/docs",
             "upload_state": "/upload_state",
-            "upload_logs": "/upload_logs (배치 데이터)",
             "user_dataset": "/user_dataset/{user_id}"
         }
     }
@@ -162,54 +157,9 @@ def upload_state(p: StatePayload):
         raise HTTPException(500, f"upload_state failed: {e}")
-@app.get("/user_dataset/{user_id}")
-async def read_user_dataset(user_id: str):
-    """Hugging Face Hub에서 사용자 데이터 조회"""
-    try:
-        # Hugging Face 환경변수 확인
-        hf_repo_id = os.getenv("HF_DATA_REPO_ID")
-        hf_token = os.getenv("HF_DATA_TOKEN")
-        if not hf_repo_id or not hf_token:
-            raise HTTPException(status_code=500, detail="Hugging Face 설정이 필요합니다 (HF_DATA_REPO_ID, HF_DATA_TOKEN)")
-        # Hugging Face Hub에서 사용자 데이터 로드
-        try:
-            dataset = load_dataset(hf_repo_id, split=user_id, token=hf_token)
-            data = dataset.to_pandas().to_dict(orient="records")
-            # 최근 5개 레코드 반환
-            recent_data = data[-5:] if len(data) > 5 else data
-            return {
-                "user_id": user_id,
-                "count": len(data),
-                "recent_data": recent_data,
-                "filename": f"{user_id}.parquet",
-                "source": "huggingface_hub",
-                "repo_id": hf_repo_id
-            }
-        except Exception as e:
-            # 데이터가 없는 경우
-            return {
-                "user_id": user_id,
-                "count": 0,
-                "recent_data": [],
-                "source": "huggingface_hub",
-                "repo_id": hf_repo_id,
-                "message": "No data found"
-            }
-    except HTTPException:
-        raise
-    except Exception as e:
-        print(f"❌ Hugging Face Hub 조회 실패: {e}")
-        raise HTTPException(status_code=500, detail=f"Hugging Face Hub 조회 실패: {str(e)}")
 @app.post("/upload_logs")
-async def upload_logs(payload: BatchUploadPayload):
-    """배치 단위로 사용자 데이터를 Hugging Face Hub로 푸시"""
     try:
         # Hugging Face 환경변수 확인
         hf_repo_id = os.getenv("HF_DATA_REPO_ID")
@@ -218,102 +168,69 @@ async def upload_logs(payload: BatchUploadPayload):
         if not hf_repo_id or not hf_token:
             raise HTTPException(status_code=500, detail="Hugging Face 설정이 필요합니다 (HF_DATA_REPO_ID, HF_DATA_TOKEN)")
-        # 사용자별로 데이터 그룹화
-        user_data_groups = {}
-        for item in payload.batch_data:
-            user_id = item.user_id
-            if user_id not in user_data_groups:
-                user_data_groups[user_id] = []
-            # 데이터 변환
-            record = {
-                "session_id": item.session_id,
-                "measure_date": item.measure_date,
-                "rms": item.rms,
-                "freq": item.freq,
-                "fatigue": item.fatigue,
-                "mode": item.mode,
-                "window_count": item.window_count,
-                "measurement_count": item.measurement_count,
-                "batch_date": payload.batch_date,
-                "batch_size": payload.batch_size,
-                "timestamp": datetime.now().isoformat()
-            }
-            user_data_groups[user_id].append(record)
-        results = {}
-        # 현재 repo에 있는 모든 split 불러오기
         try:
             existing = load_dataset(hf_repo_id, token=hf_token)
-            all_splits = list(existing.keys())
-            print(f"📂 기존 splits: {all_splits}")
-            # 기존 데이터를 완전히 새로 생성 (스키마 통일)
-            new_existing = DatasetDict()
-            for user_id in existing.keys():
-                df = existing[user_id].to_pandas()
-                # 모든 데이터를 새로 생성하여 스키마 통일
-                new_existing[user_id] = df_to_dataset(df)
-                print(f"🔧 {user_id}: 기존 데이터 재생성 완료")
-            existing = new_existing
         except Exception:
             existing = DatasetDict()
             print("📂 기존 repo 없음 → 새로 생성")
-        # 현재 사용자만 업데이트
-        for user_id, records in user_data_groups.items():
-            try:
-                # 새 데이터 처리
-                new_df = pd.DataFrame(records)
-                new_dataset = df_to_dataset(new_df)
-                if user_id in existing:
-                    # 기존 데이터와 병합
-                    old_df = existing[user_id].to_pandas()
-                    merged_df = pd.concat([old_df, new_df], ignore_index=True)
-                    existing[user_id] = df_to_dataset(merged_df)
-                    print(f"📊 {user_id}: 기존 데이터와 병합 ({len(old_df)} + {len(new_df)} = {len(merged_df)}개 레코드)")
-                else:
-                    existing[user_id] = new_dataset
-                    print(f"📊 {user_id}: 신규 데이터 추가 ({len(new_df)}개 레코드)")
-                results[user_id] = {
-                    "status": "success",
-                    "new_rows": len(records),
-                    "filename": f"{user_id}.parquet"
-                }
-            except Exception as e:
-                print(f"❌ {user_id} 처리 실패: {e}")
-                results[user_id] = {
-                    "status": "failed",
-                    "error": str(e)
-                }
-        # 모든 split 통째로 다시 push
         try:
             existing.push_to_hub(hf_repo_id, token=hf_token, private=True)
-            print(f"✅ 전체 DatasetDict 푸시 완료: {len(existing)}개 사용자")
-        except Exception as e:
-            print(f"❌ 전체 푸시 실패: {e}")
-            raise HTTPException(status_code=500, detail=f"전체 푸시 실패: {str(e)}")
-        return {
-            "batch_date": payload.batch_date,
-            "batch_size": payload.batch_size,
-            "processed_users": len(user_data_groups),
-            "results": results,
-            "repo_id": hf_repo_id,
-            "message": f"Batch upload completed for {len(user_data_groups)} users"
-        }
     except HTTPException:
         raise
     except Exception as e:
-        print(f"❌ 배치 푸시 실패: {e}")
-        raise HTTPException(status_code=500, detail=f"배치 푸시 실패: {str(e)}")
 def df_to_dataset(df):

 import os
 import json
 from typing import List, Optional
+from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel, Field, ConfigDict
 import oracledb
 from dotenv import load_dotenv
 import pandas as pd
 from datetime import datetime
 from datasets import Dataset, DatasetDict, load_dataset
     user_emb: Optional[List[float]] = Field(default=None, description="length=12")
     model_version: Optional[str] = None
+class LogUploadPayload(BaseModel):
     user_id: str
     session_id: str
     measure_date: str
     rms: float
     freq: float
     fatigue: float
+    rms_base: Optional[float] = None
+    freq_base: Optional[float] = None
+    user_emb: Optional[List[float]] = Field(default=None, description="length=12")
     mode: str
     window_count: int
     measurement_count: int
 # ----- 유틸 -----
 def clob_json(obj) -> str:
             "health_db": "/health/db (DB 연결 체크)",
             "docs": "/docs",
             "upload_state": "/upload_state",
+            "upload_logs": "/upload_logs (개별 로그 데이터)",
             "user_dataset": "/user_dataset/{user_id}"
         }
     }
         raise HTTPException(500, f"upload_state failed: {e}")
 @app.post("/upload_logs")
+async def upload_logs(payload: LogUploadPayload):
+    """개별 로그 데이터를 Hugging Face Hub로 푸시"""
     try:
         # Hugging Face 환경변수 확인
         hf_repo_id = os.getenv("HF_DATA_REPO_ID")
         if not hf_repo_id or not hf_token:
             raise HTTPException(status_code=500, detail="Hugging Face 설정이 필요합니다 (HF_DATA_REPO_ID, HF_DATA_TOKEN)")
+        # 단일 레코드 생성
+        record = {
+            "session_id": payload.session_id,
+            "measure_date": payload.measure_date,
+            "rms": payload.rms,
+            "freq": payload.freq,
+            "fatigue": payload.fatigue,
+            "rms_base": payload.rms_base,
+            "freq_base": payload.freq_base,
+            "user_emb": payload.user_emb,
+            "mode": payload.mode,
+            "window_count": payload.window_count,
+            "measurement_count": payload.measurement_count,
+            "timestamp": datetime.now().isoformat()
+        }
+        # 현재 repo에 있는 데이터 불러오기
         try:
             existing = load_dataset(hf_repo_id, token=hf_token)
+            print(f"📂 기존 데이터 로드 완료")
         except Exception:
             existing = DatasetDict()
             print("📂 기존 repo 없음 → 새로 생성")
+        # 사용자 데이터 처리
+        user_id = payload.user_id
         try:
+            # 새 데이터 처리
+            new_df = pd.DataFrame([record])
+            new_dataset = df_to_dataset(new_df)
+            if user_id in existing:
+                # 기존 데이터와 병합
+                old_df = existing[user_id].to_pandas()
+                merged_df = pd.concat([old_df, new_df], ignore_index=True)
+                existing[user_id] = df_to_dataset(merged_df)
+                print(f"📊 {user_id}: 기존 데이터와 병합 ({len(old_df)} + 1 = {len(merged_df)}개 레코드)")
+            else:
+                existing[user_id] = new_dataset
+                print(f"📊 {user_id}: 신규 데이터 추가 (1개 레코드)")
+            # 데이터 푸시
             existing.push_to_hub(hf_repo_id, token=hf_token, private=True)
+            print(f"✅ {user_id} 데이터 푸시 완료")
+            return {
+                "user_id": user_id,
+                "status": "success",
+                "new_rows": 1,
+                "filename": f"{user_id}.parquet",
+                "repo_id": hf_repo_id,
+                "message": f"Log uploaded successfully for user {user_id}"
+            }
+        except Exception as e:
+            print(f"❌ {user_id} 처리 실패: {e}")
+            raise HTTPException(status_code=500, detail=f"데이터 처리 실패: {str(e)}")
     except HTTPException:
         raise
     except Exception as e:
+        print(f"❌ 로그 업로드 실패: {e}")
+        raise HTTPException(status_code=500, detail=f"로그 업로드 실패: {str(e)}")
 def df_to_dataset(df):