File size: 2,053 Bytes
df5e016
3753c9d
 
df5e016
3753c9d
 
 
 
 
df5e016
3753c9d
 
df5e016
3753c9d
 
 
 
df5e016
3753c9d
 
df5e016
3753c9d
 
 
 
 
 
 
 
 
 
df5e016
3753c9d
df5e016
3753c9d
 
 
 
 
df5e016
3753c9d
 
 
df5e016
3753c9d
 
df5e016
 
3753c9d
 
df5e016
3753c9d
 
 
df5e016
3753c9d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import asyncio
import os
import sys

from sqlalchemy import select, update

# ํ”„๋กœ์ ํŠธ ๋ฃจํŠธ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ path์— ์ถ”๊ฐ€ (app ๋ชจ๋“ˆ ์ž„ํฌํŠธ์šฉ)
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from app.core.ai_client import get_ai_client
from app.core.database import AsyncSessionLocal
from app.models import IndustryCategory


async def sync_embeddings():
    """๋กœ์ปฌ ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•˜์—ฌ ๋ชจ๋“  ์—…์ข… ๋ฐ์ดํ„ฐ๋ฅผ ๊ณ ์†์œผ๋กœ ๋™๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค."""
    print("๐Ÿš€ ๋กœ์ปฌ ๋ชจ๋ธ ๊ธฐ๋ฐ˜ ์ดˆ๊ณ ์† ์ž„๋ฒ ๋”ฉ ๋™๊ธฐํ™” ์‹œ์ž‘...")

    # ๋ชจ๋ธ ๋กœ๋”ฉ (์ตœ์ดˆ ์‹คํ–‰ ์‹œ ๋‹ค์šด๋กœ๋“œ ์‹œ๊ฐ„์ด ๋ฐœ์ƒํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค)
    ai_client = get_ai_client("gemini")

    async with AsyncSessionLocal() as db:
        # 1. ๊ธฐ์กด ์ž„๋ฒ ๋”ฉ ์ดˆ๊ธฐํ™” (๋ชจ๋ธ์ด ๋ฐ”๋€Œ์—ˆ์œผ๋ฏ€๋กœ ์ „์ฒด ์žฌ์ž‘์„ฑ ํ•„์š”)
        print("๐Ÿงน ๊ธฐ์กด ์ž„๋ฒ ๋”ฉ ๋ฐ์ดํ„ฐ ์ดˆ๊ธฐํ™” ์ค‘...")
        await db.execute(update(IndustryCategory).values(embedding=None))
        await db.commit()

        # 2. ์ „์ฒด ์—…์ข… ์กฐํšŒ
        stmt = select(IndustryCategory)
        result = await db.execute(stmt)
        industries = result.scalars().all()

        print(f"๐Ÿ“ฆ ์ด {len(industries)}๊ฐœ์˜ ์—…์ข…์„ ๋กœ์ปฌ ๋ชจ๋ธ๋กœ ์ฒ˜๋ฆฌํ•ฉ๋‹ˆ๋‹ค.")

        count = 0
        for ind in industries:
            try:
                # 3. ๋กœ์ปฌ ๋ชจ๋ธ๋กœ ์ž„๋ฒ ๋”ฉ ์ƒ์„ฑ (์ดˆ๊ณ ์†)
                vector = await ai_client.embed_text(ind.name)

                # 4. DB ์—…๋ฐ์ดํŠธ
                ind.embedding = vector
                count += 1

                if count % 100 == 0:
                    print(f"โณ ์ง„ํ–‰ ์ค‘... ({count}/{len(industries)})")
                    await db.commit()  # 100๊ฐœ ๋‹จ์œ„๋กœ ์ €์žฅ

            except Exception as e:
                print(f"โŒ ์˜ค๋ฅ˜ ๋ฐœ์ƒ ({ind.name}): {str(e)}")

        await db.commit()
        print(f"โœจ ๋™๊ธฐํ™” ์™„๋ฃŒ! ์ด {count}๊ฐœ์˜ ์—…์ข…์ด ๋กœ์ปฌ ๋ชจ๋ธ๋กœ ์—…๋ฐ์ดํŠธ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")


if __name__ == "__main__":
    asyncio.run(sync_embeddings())