nexus-ai-api / scripts /sync_industry_embeddings.py
github-actions[bot]
Auto Deploy from Monorepo: Merge pull request #72 from human13th2team/dev
df5e016
import asyncio
import os
import sys
from sqlalchemy import select, update
# ํ”„๋กœ์ ํŠธ ๋ฃจํŠธ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ path์— ์ถ”๊ฐ€ (app ๋ชจ๋“ˆ ์ž„ํฌํŠธ์šฉ)
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from app.core.ai_client import get_ai_client
from app.core.database import AsyncSessionLocal
from app.models import IndustryCategory
async def sync_embeddings():
"""๋กœ์ปฌ ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•˜์—ฌ ๋ชจ๋“  ์—…์ข… ๋ฐ์ดํ„ฐ๋ฅผ ๊ณ ์†์œผ๋กœ ๋™๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค."""
print("๐Ÿš€ ๋กœ์ปฌ ๋ชจ๋ธ ๊ธฐ๋ฐ˜ ์ดˆ๊ณ ์† ์ž„๋ฒ ๋”ฉ ๋™๊ธฐํ™” ์‹œ์ž‘...")
# ๋ชจ๋ธ ๋กœ๋”ฉ (์ตœ์ดˆ ์‹คํ–‰ ์‹œ ๋‹ค์šด๋กœ๋“œ ์‹œ๊ฐ„์ด ๋ฐœ์ƒํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค)
ai_client = get_ai_client("gemini")
async with AsyncSessionLocal() as db:
# 1. ๊ธฐ์กด ์ž„๋ฒ ๋”ฉ ์ดˆ๊ธฐํ™” (๋ชจ๋ธ์ด ๋ฐ”๋€Œ์—ˆ์œผ๋ฏ€๋กœ ์ „์ฒด ์žฌ์ž‘์„ฑ ํ•„์š”)
print("๐Ÿงน ๊ธฐ์กด ์ž„๋ฒ ๋”ฉ ๋ฐ์ดํ„ฐ ์ดˆ๊ธฐํ™” ์ค‘...")
await db.execute(update(IndustryCategory).values(embedding=None))
await db.commit()
# 2. ์ „์ฒด ์—…์ข… ์กฐํšŒ
stmt = select(IndustryCategory)
result = await db.execute(stmt)
industries = result.scalars().all()
print(f"๐Ÿ“ฆ ์ด {len(industries)}๊ฐœ์˜ ์—…์ข…์„ ๋กœ์ปฌ ๋ชจ๋ธ๋กœ ์ฒ˜๋ฆฌํ•ฉ๋‹ˆ๋‹ค.")
count = 0
for ind in industries:
try:
# 3. ๋กœ์ปฌ ๋ชจ๋ธ๋กœ ์ž„๋ฒ ๋”ฉ ์ƒ์„ฑ (์ดˆ๊ณ ์†)
vector = await ai_client.embed_text(ind.name)
# 4. DB ์—…๋ฐ์ดํŠธ
ind.embedding = vector
count += 1
if count % 100 == 0:
print(f"โณ ์ง„ํ–‰ ์ค‘... ({count}/{len(industries)})")
await db.commit() # 100๊ฐœ ๋‹จ์œ„๋กœ ์ €์žฅ
except Exception as e:
print(f"โŒ ์˜ค๋ฅ˜ ๋ฐœ์ƒ ({ind.name}): {str(e)}")
await db.commit()
print(f"โœจ ๋™๊ธฐํ™” ์™„๋ฃŒ! ์ด {count}๊ฐœ์˜ ์—…์ข…์ด ๋กœ์ปฌ ๋ชจ๋ธ๋กœ ์—…๋ฐ์ดํŠธ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
if __name__ == "__main__":
asyncio.run(sync_embeddings())