from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from langchain_huggingface import HuggingFaceEmbeddings from langchain_chroma import Chroma from huggingface_hub import snapshot_download import os import shutil app = FastAPI() # CORS — 모든 출처 허용 (Claude 아티팩트 포함) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=False, allow_methods=["*"], allow_headers=["*"], ) DB_LOCAL_PATH = "./chroma_db" if not os.path.exists(DB_LOCAL_PATH): print("🔄 HuggingFace에서 DB 다운로드 중...") snapshot_download( repo_id=os.environ["HF_REPO_ID"], repo_type="dataset", token=os.environ["HF_TOKEN"], local_dir="./hf_data", ) #shutil.copytree("./hf_data/chroma_db", DB_LOCAL_PATH) src = "./hf_data/chroma_db" if os.path.exists("./hf_data/chroma_db") else "./hf_data" shutil.copytree(src, DB_LOCAL_PATH) print("✅ DB 다운로드 완료") print("🔄 임베딩 모델 로딩 중...") embeddings = HuggingFaceEmbeddings(model_name="jhgan/ko-sroberta-multitask") db = Chroma(persist_directory=DB_LOCAL_PATH, embedding_function=embeddings) print(f"✅ DB 로드 완료 — 청크 수: {db._collection.count()}") class QueryRequest(BaseModel): query: str k: int = 5 @app.get("/") def root(): return {"status": "ok", "chunks": db._collection.count()} @app.get("/health") def health(): return {"status": "ok", "chunks": db._collection.count()} @app.post("/retrieve") def retrieve(req: QueryRequest): docs = db.similarity_search(req.query, k=req.k) return { "documents": [ { "text": doc.page_content, "source": doc.metadata.get("source", "알 수 없음"), "region": doc.metadata.get("region", "알 수 없음"), } for doc in docs ] }