Spaces:
Running
Running
File size: 1,945 Bytes
ee646ee ae67217 ee646ee ae67217 ee646ee f9c2194 ee646ee ae67217 ee646ee | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from huggingface_hub import snapshot_download
import os
import shutil
app = FastAPI()
# CORS β λͺ¨λ μΆμ² νμ© (Claude μν°ν©νΈ ν¬ν¨)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=False,
allow_methods=["*"],
allow_headers=["*"],
)
DB_LOCAL_PATH = "./chroma_db"
if not os.path.exists(DB_LOCAL_PATH):
print("π HuggingFaceμμ DB λ€μ΄λ‘λ μ€...")
snapshot_download(
repo_id=os.environ["HF_REPO_ID"],
repo_type="dataset",
token=os.environ["HF_TOKEN"],
local_dir="./hf_data",
)
#shutil.copytree("./hf_data/chroma_db", DB_LOCAL_PATH)
src = "./hf_data/chroma_db" if os.path.exists("./hf_data/chroma_db") else "./hf_data"
shutil.copytree(src, DB_LOCAL_PATH)
print("β
DB λ€μ΄λ‘λ μλ£")
print("π μλ² λ© λͺ¨λΈ λ‘λ© μ€...")
embeddings = HuggingFaceEmbeddings(model_name="jhgan/ko-sroberta-multitask")
db = Chroma(persist_directory=DB_LOCAL_PATH, embedding_function=embeddings)
print(f"β
DB λ‘λ μλ£ β μ²ν¬ μ: {db._collection.count()}")
class QueryRequest(BaseModel):
query: str
k: int = 5
@app.get("/")
def root():
return {"status": "ok", "chunks": db._collection.count()}
@app.get("/health")
def health():
return {"status": "ok", "chunks": db._collection.count()}
@app.post("/retrieve")
def retrieve(req: QueryRequest):
docs = db.similarity_search(req.query, k=req.k)
return {
"documents": [
{
"text": doc.page_content,
"source": doc.metadata.get("source", "μ μ μμ"),
"region": doc.metadata.get("region", "μ μ μμ"),
}
for doc in docs
]
} |