Spaces:
Sleeping
Sleeping
File size: 2,053 Bytes
df5e016 3753c9d df5e016 3753c9d df5e016 3753c9d df5e016 3753c9d df5e016 3753c9d df5e016 3753c9d df5e016 3753c9d df5e016 3753c9d df5e016 3753c9d df5e016 3753c9d df5e016 3753c9d df5e016 3753c9d df5e016 3753c9d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | import asyncio
import os
import sys
from sqlalchemy import select, update
# ํ๋ก์ ํธ ๋ฃจํธ ๋๋ ํ ๋ฆฌ๋ฅผ path์ ์ถ๊ฐ (app ๋ชจ๋ ์ํฌํธ์ฉ)
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from app.core.ai_client import get_ai_client
from app.core.database import AsyncSessionLocal
from app.models import IndustryCategory
async def sync_embeddings():
"""๋ก์ปฌ ๋ชจ๋ธ์ ์ฌ์ฉํ์ฌ ๋ชจ๋ ์
์ข
๋ฐ์ดํฐ๋ฅผ ๊ณ ์์ผ๋ก ๋๊ธฐํํฉ๋๋ค."""
print("๐ ๋ก์ปฌ ๋ชจ๋ธ ๊ธฐ๋ฐ ์ด๊ณ ์ ์๋ฒ ๋ฉ ๋๊ธฐํ ์์...")
# ๋ชจ๋ธ ๋ก๋ฉ (์ต์ด ์คํ ์ ๋ค์ด๋ก๋ ์๊ฐ์ด ๋ฐ์ํ ์ ์์ต๋๋ค)
ai_client = get_ai_client("gemini")
async with AsyncSessionLocal() as db:
# 1. ๊ธฐ์กด ์๋ฒ ๋ฉ ์ด๊ธฐํ (๋ชจ๋ธ์ด ๋ฐ๋์์ผ๋ฏ๋ก ์ ์ฒด ์ฌ์์ฑ ํ์)
print("๐งน ๊ธฐ์กด ์๋ฒ ๋ฉ ๋ฐ์ดํฐ ์ด๊ธฐํ ์ค...")
await db.execute(update(IndustryCategory).values(embedding=None))
await db.commit()
# 2. ์ ์ฒด ์
์ข
์กฐํ
stmt = select(IndustryCategory)
result = await db.execute(stmt)
industries = result.scalars().all()
print(f"๐ฆ ์ด {len(industries)}๊ฐ์ ์
์ข
์ ๋ก์ปฌ ๋ชจ๋ธ๋ก ์ฒ๋ฆฌํฉ๋๋ค.")
count = 0
for ind in industries:
try:
# 3. ๋ก์ปฌ ๋ชจ๋ธ๋ก ์๋ฒ ๋ฉ ์์ฑ (์ด๊ณ ์)
vector = await ai_client.embed_text(ind.name)
# 4. DB ์
๋ฐ์ดํธ
ind.embedding = vector
count += 1
if count % 100 == 0:
print(f"โณ ์งํ ์ค... ({count}/{len(industries)})")
await db.commit() # 100๊ฐ ๋จ์๋ก ์ ์ฅ
except Exception as e:
print(f"โ ์ค๋ฅ ๋ฐ์ ({ind.name}): {str(e)}")
await db.commit()
print(f"โจ ๋๊ธฐํ ์๋ฃ! ์ด {count}๊ฐ์ ์
์ข
์ด ๋ก์ปฌ ๋ชจ๋ธ๋ก ์
๋ฐ์ดํธ๋์์ต๋๋ค.")
if __name__ == "__main__":
asyncio.run(sync_embeddings())
|