Spaces:
Running
Running
| from fastapi import FastAPI | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_chroma import Chroma | |
| from huggingface_hub import snapshot_download | |
| import os | |
| import shutil | |
| app = FastAPI() | |
| # CORS β λͺ¨λ μΆμ² νμ© (Claude μν°ν©νΈ ν¬ν¨) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=False, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| DB_LOCAL_PATH = "./chroma_db" | |
| if not os.path.exists(DB_LOCAL_PATH): | |
| print("π HuggingFaceμμ DB λ€μ΄λ‘λ μ€...") | |
| snapshot_download( | |
| repo_id=os.environ["HF_REPO_ID"], | |
| repo_type="dataset", | |
| token=os.environ["HF_TOKEN"], | |
| local_dir="./hf_data", | |
| ) | |
| #shutil.copytree("./hf_data/chroma_db", DB_LOCAL_PATH) | |
| src = "./hf_data/chroma_db" if os.path.exists("./hf_data/chroma_db") else "./hf_data" | |
| shutil.copytree(src, DB_LOCAL_PATH) | |
| print("β DB λ€μ΄λ‘λ μλ£") | |
| print("π μλ² λ© λͺ¨λΈ λ‘λ© μ€...") | |
| embeddings = HuggingFaceEmbeddings(model_name="jhgan/ko-sroberta-multitask") | |
| db = Chroma(persist_directory=DB_LOCAL_PATH, embedding_function=embeddings) | |
| print(f"β DB λ‘λ μλ£ β μ²ν¬ μ: {db._collection.count()}") | |
| class QueryRequest(BaseModel): | |
| query: str | |
| k: int = 5 | |
| def root(): | |
| return {"status": "ok", "chunks": db._collection.count()} | |
| def health(): | |
| return {"status": "ok", "chunks": db._collection.count()} | |
| def retrieve(req: QueryRequest): | |
| docs = db.similarity_search(req.query, k=req.k) | |
| return { | |
| "documents": [ | |
| { | |
| "text": doc.page_content, | |
| "source": doc.metadata.get("source", "μ μ μμ"), | |
| "region": doc.metadata.get("region", "μ μ μμ"), | |
| } | |
| for doc in docs | |
| ] | |
| } |