Spaces:

Merry99
/

MuscleCare-FastAPI

Sleeping

App Files Files Community

Merry99 commited on Oct 23, 2025

Commit

57443be

1 Parent(s): 33f6b6b

batch insert

Browse files

Files changed (7) hide show

.gitignore +0 -4
Dockerfile +3 -14
app.py +175 -69
crontab +0 -4
last_push_date.txt +0 -1
start_with_cron.sh +0 -25
upload_hf_dataset.py +0 -108

.gitignore CHANGED Viewed

@@ -34,7 +34,3 @@ Thumbs.db
 # 로그
 *.log
-# 업로드 데이터
-uploads/
-training_data/


34
35	# 로그
36	*.log

Dockerfile CHANGED Viewed

@@ -6,7 +6,6 @@ WORKDIR /app
 # 시스템 패키지 업데이트 및 필요한 패키지 설치
 RUN apt-get update && apt-get install -y \
     build-essential \
-    cron \
     && rm -rf /var/lib/apt/lists/*
 # requirements 복사 및 설치
@@ -16,22 +15,12 @@ RUN pip install --no-cache-dir -r requirements.txt
 # 애플리케이션 코드 복사
 COPY . .
-# cron 설정 파일 복사
-COPY crontab /etc/cron.d/batch-push-cron
-# cron 권한 설정
-RUN chmod 0644 /etc/cron.d/batch-push-cron
-RUN crontab /etc/cron.d/batch-push-cron
 # 로그 디렉토리 생성
-RUN mkdir -p /var/log
 # Hugging Face Space는 포트 7860을 사용합니다
 EXPOSE 7860
-# 시작 스크립트 복사 및 실행
-COPY start_with_cron.sh /start_with_cron.sh
-RUN chmod +x /start_with_cron.sh
-CMD ["/start_with_cron.sh"]

 # 시스템 패키지 업데이트 및 필요한 패키지 설치
 RUN apt-get update && apt-get install -y \
     build-essential \
     && rm -rf /var/lib/apt/lists/*
 # requirements 복사 및 설치
 # 애플리케이션 코드 복사
 COPY . .
 # 로그 디렉토리 생성
+RUN mkdir -p /app/logs
 # Hugging Face Space는 포트 7860을 사용합니다
 EXPOSE 7860
+# FastAPI 서버 직접 실행 (APScheduler 포함)
+CMD ["python", "start.py"]

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import json
 from typing import List, Optional
 from fastapi import FastAPI, HTTPException, Request
 from pydantic import BaseModel, Field, ConfigDict
-from typing import List
 import oracledb
 from dotenv import load_dotenv
 import json
@@ -56,6 +55,24 @@ class StatePayload(BaseModel):
     user_emb: Optional[List[float]] = Field(default=None, description="length=12")
     model_version: Optional[str] = None
 # ----- 유틸 -----
 def clob_json(obj) -> str:
@@ -74,7 +91,7 @@ def root():
             "health_db": "/health/db (DB 연결 체크)",
             "docs": "/docs",
             "upload_state": "/upload_state",
-            "upload_dataset": "/upload_dataset",
             "user_dataset": "/user_dataset/{user_id}"
         }
     }
@@ -146,91 +163,180 @@ def upload_state(p: StatePayload):
     except Exception as e:
         raise HTTPException(500, f"upload_state failed: {e}")
-@app.post("/upload_dataset")
-async def upload_to_dataset(request: Request):
-    """로컬 파일에 사용자별 데이터 저장"""
     try:
-        data = await request.json()
-        user_id = data.get("user_id")
-        if not user_id:
-            raise HTTPException(status_code=400, detail="user_id가 필요합니다")
-        # 데이터에 타임스탬프 추가
-        data["timestamp"] = datetime.now().isoformat()
-        # 로컬 데이터 디렉토리 생성
-        data_dir = "user_data"
-        os.makedirs(data_dir, exist_ok=True)
-        # 사용자별 JSON 파일 경로
-        user_file = os.path.join(data_dir, f"{user_id}.json")
-        # 기존 데이터 로드
-        existing_data = []
-        if os.path.exists(user_file):
-            try:
-                with open(user_file, 'r', encoding='utf-8') as f:
-                    existing_data = json.load(f)
-                print(f"📊 기존 데이터 로드: {user_id} ({len(existing_data)}개 레코드)")
-            except:
-                existing_data = []
-        # 새 데이터 추가
-        existing_data.append(data)
-        # 파일 저장
-        with open(user_file, 'w', encoding='utf-8') as f:
-            json.dump(existing_data, f, ensure_ascii=False, indent=2)
-        print(f"✅ 로컬 파일 저장 완료: {user_id} ({len(existing_data)}개 레코드)")
-        return {
-            "user_id": user_id,
-            "rows": len(existing_data),
-            "status": "success",
-            "filename": f"{user_id}.json",
-            "file_path": user_file,
-            "message": f"Data saved to local file: {user_file}"
-        }
     except Exception as e:
-        print(f"❌ 로컬 저장 실패: {e}")
-        raise HTTPException(status_code=500, detail=f"로컬 저장 실패: {str(e)}")
 @app.get("/user_dataset/{user_id}")
 async def read_user_dataset(user_id: str):
-    """로컬 파일에서 사용자 데이터 조회"""
     try:
-        # 사용자별 JSON 파일 경로
-        data_dir = "user_data"
-        user_file = os.path.join(data_dir, f"{user_id}.json")
-        if not os.path.exists(user_file):
             return {
                 "user_id": user_id,
                 "count": 0,
                 "recent_data": [],
-                "source": "local_file",
                 "message": "No data found"
             }
-        # 데이터 로드
-        with open(user_file, 'r', encoding='utf-8') as f:
-            data = json.load(f)
-        # 최근 5개 레코드 반환
-        recent_data = data[-5:] if len(data) > 5 else data
         return {
-            "user_id": user_id,
-            "count": len(data),
-            "recent_data": recent_data,
-            "filename": f"{user_id}.json",
-            "source": "local_file"
         }
     except Exception as e:
-        print(f"❌ 로컬 조회 실패: {e}")
-        raise HTTPException(status_code=500, detail=f"로컬 조회 실패: {str(e)}")

 from typing import List, Optional
 from fastapi import FastAPI, HTTPException, Request
 from pydantic import BaseModel, Field, ConfigDict
 import oracledb
 from dotenv import load_dotenv
 import json
     user_emb: Optional[List[float]] = Field(default=None, description="length=12")
     model_version: Optional[str] = None
+# 배치 데이터용 스키마
+class BatchDataItem(BaseModel):
+    user_id: str
+    session_id: str
+    measure_date: str
+    rms: float
+    freq: float
+    fatigue: float
+    mode: str
+    window_count: int
+    windows: List[dict] = Field(default_factory=list)
+    measurement_count: int
+class BatchUploadPayload(BaseModel):
+    batch_data: List[BatchDataItem]
+    batch_size: int
+    batch_date: str
 # ----- 유틸 -----
 def clob_json(obj) -> str:
             "health_db": "/health/db (DB 연결 체크)",
             "docs": "/docs",
             "upload_state": "/upload_state",
+            "upload_batch_dataset": "/upload_batch_dataset (배치 데이터)",
             "user_dataset": "/user_dataset/{user_id}"
         }
     }
     except Exception as e:
         raise HTTPException(500, f"upload_state failed: {e}")
+@app.on_event("startup")
+async def startup_event():
+    """서버 시작 시 초기화"""
+    print("🚀 MuscleCare API 서버 시작 중...")
+    # 로그 디렉토리 생성 (로컬/배포 환경 구분)
+    log_dir = "/app/logs" if os.path.exists("/app") else "./logs"
+    os.makedirs(log_dir, exist_ok=True)
+    print(f"📁 로그 디렉토리 생성: {log_dir}")
+    # Oracle DB 초기화
     try:
+        db_initialized = init_db_from_env()
+        if db_initialized:
+            print("✅ Oracle DB 연결 완료")
+        else:
+            print("⚠️  Oracle DB 연결 실패 - DB 관련 기능이 비활성화됩니다")
+    except Exception as e:
+        print(f"❌ Oracle DB 초기화 오류: {e}")
+    print("✅ 서버 시작 완료")
+@app.on_event("shutdown")
+async def shutdown_event():
+    """서버 종료 시 정리"""
+    print("🛑 서버 종료 중...")
+    try:
+        db_manager = get_db_manager()
+        db_manager.close()
+        print("✅ Oracle DB 연결 종료 완료")
     except Exception as e:
+        print(f"❌ 종료 처리 오류: {e}")
 @app.get("/user_dataset/{user_id}")
 async def read_user_dataset(user_id: str):
+    """Hugging Face Hub에서 사용자 데이터 조회"""
     try:
+        # Hugging Face 환경변수 확인
+        hf_repo_id = os.getenv("HF_DATA_REPO_ID")
+        hf_token = os.getenv("HF_DATA_TOKEN")
+        if not hf_repo_id or not hf_token:
+            raise HTTPException(status_code=500, detail="Hugging Face 설정이 필요합니다 (HF_DATA_REPO_ID, HF_DATA_TOKEN)")
+        # Hugging Face Hub에서 사용자 데이터 로드
+        try:
+            dataset = load_dataset(hf_repo_id, split=user_id, token=hf_token)
+            data = dataset.to_pandas().to_dict(orient="records")
+            # 최근 5개 레코드 반환
+            recent_data = data[-5:] if len(data) > 5 else data
+            return {
+                "user_id": user_id,
+                "count": len(data),
+                "recent_data": recent_data,
+                "filename": f"{user_id}.parquet",
+                "source": "huggingface_hub",
+                "repo_id": hf_repo_id
+            }
+        except Exception as e:
+            # 데이터가 없는 경우
             return {
                 "user_id": user_id,
                 "count": 0,
                 "recent_data": [],
+                "source": "huggingface_hub",
+                "repo_id": hf_repo_id,
                 "message": "No data found"
             }
+    except HTTPException:
+        raise
+    except Exception as e:
+        print(f"❌ Hugging Face Hub 조회 실패: {e}")
+        raise HTTPException(status_code=500, detail=f"Hugging Face Hub 조회 실패: {str(e)}")
+@app.post("/upload_batch_dataset")
+async def upload_batch_dataset(payload: BatchUploadPayload):
+    """배치 단위로 사용자 데이터를 Hugging Face Hub로 푸시"""
+    try:
+        # Hugging Face 환경변수 확인
+        hf_repo_id = os.getenv("HF_DATA_REPO_ID")
+        hf_token = os.getenv("HF_DATA_TOKEN")
+        if not hf_repo_id or not hf_token:
+            raise HTTPException(status_code=500, detail="Hugging Face 설정이 필요합니다 (HF_DATA_REPO_ID, HF_DATA_TOKEN)")
+        # 사용자별로 데이터 그룹화
+        user_data_groups = {}
+        for item in payload.batch_data:
+            user_id = item.user_id
+            if user_id not in user_data_groups:
+                user_data_groups[user_id] = []
+            # 데이터 변환
+            record = {
+                "session_id": item.session_id,
+                "measure_date": item.measure_date,
+                "rms": item.rms,
+                "freq": item.freq,
+                "fatigue": item.fatigue,
+                "mode": item.mode,
+                "window_count": item.window_count,
+                "windows": item.windows,
+                "measurement_count": item.measurement_count,
+                "batch_date": payload.batch_date,
+                "batch_size": payload.batch_size,
+                "timestamp": datetime.now().isoformat()
+            }
+            user_data_groups[user_id].append(record)
+        results = {}
+        # 현재 repo에 있는 모든 split 불러오기
+        try:
+            existing = load_dataset(hf_repo_id, token=hf_token)
+            all_splits = list(existing.keys())
+            print(f"📂 기존 splits: {all_splits}")
+        except Exception:
+            existing = DatasetDict()
+            print("📂 기존 repo 없음 → 새로 생성")
+        # 현재 사용자만 업데이트
+        for user_id, records in user_data_groups.items():
+            try:
+                df = pd.DataFrame(records)
+                new_dataset = Dataset.from_pandas(df)
+                if user_id in existing:
+                    # 기존 데이터프레임과 병합
+                    old_df = existing[user_id].to_pandas()
+                    merged = pd.concat([old_df, df], ignore_index=True)
+                    existing[user_id] = Dataset.from_pandas(merged)
+                    print(f"📊 {user_id}: 기존 데이터와 병합 ({len(old_df)} + {len(df)} = {len(merged)}개 레코드)")
+                else:
+                    existing[user_id] = new_dataset
+                    print(f"📊 {user_id}: 신규 데이터 추가 ({len(df)}개 레코드)")
+                results[user_id] = {
+                    "status": "success",
+                    "new_rows": len(records),
+                    "filename": f"{user_id}.parquet"
+                }
+            except Exception as e:
+                print(f"❌ {user_id} 처리 실패: {e}")
+                results[user_id] = {
+                    "status": "failed",
+                    "error": str(e)
+                }
+        # 모든 split 통째로 다시 push
+        try:
+            existing.push_to_hub(hf_repo_id, token=hf_token, private=True)
+            print(f"✅ 전체 DatasetDict 푸시 완료: {len(existing)}개 사용자")
+        except Exception as e:
+            print(f"❌ 전체 푸시 실패: {e}")
+            raise HTTPException(status_code=500, detail=f"전체 푸시 실패: {str(e)}")
         return {
+            "batch_date": payload.batch_date,
+            "batch_size": payload.batch_size,
+            "processed_users": len(user_data_groups),
+            "results": results,
+            "repo_id": hf_repo_id,
+            "message": f"Batch upload completed for {len(user_data_groups)} users"
         }
+    except HTTPException:
+        raise
     except Exception as e:
+        print(f"❌ 배치 푸시 실패: {e}")
+        raise HTTPException(status_code=500, detail=f"배치 푸시 실패: {str(e)}")

crontab DELETED Viewed

@@ -1,4 +0,0 @@
-# 매일 자정에 배치 푸시 실행
-0 0 * * * cd /app && python upload_hF_dataset.py >> /var/log/batch_push.log 2>&1
-# 빈 줄 필요 (cron 요구사항)

last_push_date.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- 2025-10-23

start_with_cron.sh DELETED Viewed

@@ -1,25 +0,0 @@
-#!/bin/bash
-# 로그 디렉토리 생성
-mkdir -p /var/log
-# cron 서비스 시작
-service cron start
-# cron 상태 확인
-echo "📅 Cron 서비스 시작됨"
-crontab -l
-# FastAPI 서버 시작 (백그라운드)
-echo "🚀 FastAPI 서버 시작..."
-python start.py &
-# 서버가 시작될 때까지 대기
-sleep 5
-# 서버 시작 완료
-echo "✅ FastAPI 서버 시작 완료"
-# 로그 모니터링 (선택사항)
-echo "📊 배치 푸시 로그 모니터링 시작..."
-tail -f /var/log/batch_push.log &

upload_hf_dataset.py DELETED Viewed

@@ -1,108 +0,0 @@
-from datasets import Dataset, DatasetDict
-from datetime import datetime, date
-import pandas as pd, glob, json, os, shutil
-from dotenv import load_dotenv
-load_dotenv()
-HF_DATA_REPO_ID = os.getenv("HF_DATA_REPO_ID")
-HF_DATA_TOKEN = os.getenv("HF_DATA_TOKEN")
-CACHE_DIR = "./user_data"
-BACKUP_DIR = "./backup"
-LAST_PUSH_FILE = "./last_push_date.txt"
-def now_str():
-    return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-def get_last_push_date():
-    """마지막 푸시 날짜 반환"""
-    if os.path.exists(LAST_PUSH_FILE):
-        with open(LAST_PUSH_FILE, "r") as f:
-            return f.read().strip()
-    return None
-def update_last_push_date():
-    """마지막 푸시 날짜 기록"""
-    with open(LAST_PUSH_FILE, "w") as f:
-        f.write(str(date.today()))
-def should_push_today():
-    """오늘 푸시 여부 확인"""
-    last_push = get_last_push_date()
-    today = str(date.today())
-    return last_push != today
-def batch_push_to_huggingface():
-    """하루 1회 Hugging Face Dataset 업로드"""
-    # 필수 환경변수 확인
-    if not HF_DATA_REPO_ID or not HF_DATA_TOKEN:
-        print(f"❌ {now_str()} - 환경변수 HF_DATA_REPO_ID 또는 HF_DATA_TOKEN이 설정되지 않았습니다.")
-        return
-    # 푸시 여부 체크
-    if not should_push_today():
-        print(f"📅 {now_str()} - 이미 오늘 푸시 완료됨. 종료.")
-        return
-    files = glob.glob(os.path.join(CACHE_DIR, "*.json"))
-    if not files:
-        print(f"📁 {now_str()} - 캐시된 파일이 없습니다. 종료.")
-        return
-    print(f"🚀 {now_str()} - 배치 푸시 시작 ({len(files)}개 파일)")
-    user_splits = {}
-    for path in files:
-        user_id = os.path.basename(path).split(".")[0]
-        try:
-            with open(path, "r", encoding="utf-8") as f:
-                records = json.load(f)
-            if not records:
-                print(f"⚠️ {user_id}: 비어있는 파일, 건너뜀")
-                continue
-            df = pd.DataFrame(records)
-            user_splits[user_id] = Dataset.from_pandas(df)
-            print(f"📊 {user_id}: {len(records)}개 레코드 변환 완료")
-        except Exception as e:
-            print(f"❌ {user_id}: 파일 로드 실패 → {e}")
-            continue
-    if not user_splits:
-        print(f"❌ {now_str()} - 처리할 데이터가 없습니다. 종료.")
-        return
-    # 백업 디렉토리 생성
-    os.makedirs(BACKUP_DIR, exist_ok=True)
-    backup_path = os.path.join(BACKUP_DIR, date.today().isoformat())
-    shutil.copytree(CACHE_DIR, backup_path, dirs_exist_ok=True)
-    print(f"🗂️ {now_str()} - 데이터 백업 완료 → {backup_path}")
-    try:
-        dataset_dict = DatasetDict(user_splits)
-        dataset_dict.push_to_hub(HF_DATA_REPO_ID, token=HF_DATA_TOKEN, private=True)
-        print(f"✅ {now_str()} - Hugging Face Hub 푸시 성공 ({len(user_splits)}명) → {HF_DATA_REPO_ID}")
-        # 푸시 성공 시 캐시 정리
-        shutil.rmtree(CACHE_DIR, ignore_errors=True)
-        print(f"🗑️ {now_str()} - user_data 디렉토리 삭제 완료")
-        update_last_push_date()
-        print(f"📅 {now_str()} - 마지막 푸시 날짜 업데이트 완료")
-    except Exception as e:
-        print(f"❌ {now_str()} - 푸시 실패: {e}")
-        print(f"⚠️ {now_str()} - 캐시 유지 (데이터 유실 방지)")
-        # 실패 시 캐시 유지
-        return
-def main():
-    """CLI/cron 진입점"""
-    try:
-        batch_push_to_huggingface()
-    except Exception as e:
-        print(f"💥 {now_str()} - 예기치 못한 오류 발생: {e}")
-if __name__ == "__main__":
-    main()