gbrabbit commited on
Commit
130525d
ยท
1 Parent(s): b386020

Auto commit at 19-2025-08 20:43:11

Browse files
0.41.0 ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Requirement already satisfied: peft in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (0.8.0)
2
+ Requirement already satisfied: bitsandbytes in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (0.46.1)
3
+ Requirement already satisfied: numpy>=1.17 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (2.2.6)
4
+ Requirement already satisfied: packaging>=20.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (25.0)
5
+ Requirement already satisfied: psutil in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (7.0.0)
6
+ Requirement already satisfied: pyyaml in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (6.0.2)
7
+ Requirement already satisfied: torch>=1.13.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (2.7.1)
8
+ Requirement already satisfied: transformers in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (4.54.1)
9
+ Requirement already satisfied: tqdm in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (4.67.1)
10
+ Requirement already satisfied: accelerate>=0.21.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (1.9.0)
11
+ Requirement already satisfied: safetensors in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (0.5.3)
12
+ Requirement already satisfied: huggingface-hub>=0.17.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (0.34.3)
13
+ Requirement already satisfied: filelock in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from torch>=1.13.0->peft) (3.18.0)
14
+ Requirement already satisfied: typing-extensions>=4.10.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from torch>=1.13.0->peft) (4.14.1)
15
+ Requirement already satisfied: sympy>=1.13.3 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from torch>=1.13.0->peft) (1.14.0)
16
+ Requirement already satisfied: networkx in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from torch>=1.13.0->peft) (3.5)
17
+ Requirement already satisfied: jinja2 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from torch>=1.13.0->peft) (3.1.6)
18
+ Requirement already satisfied: fsspec in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from torch>=1.13.0->peft) (2025.7.0)
19
+ Requirement already satisfied: requests in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from huggingface-hub>=0.17.0->peft) (2.32.4)
20
+ Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from sympy>=1.13.3->torch>=1.13.0->peft) (1.3.0)
21
+ Requirement already satisfied: colorama in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from tqdm->peft) (0.4.6)
22
+ Requirement already satisfied: MarkupSafe>=2.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from jinja2->torch>=1.13.0->peft) (3.0.2)
23
+ Requirement already satisfied: charset_normalizer<4,>=2 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from requests->huggingface-hub>=0.17.0->peft) (3.4.2)
24
+ Requirement already satisfied: idna<4,>=2.5 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from requests->huggingface-hub>=0.17.0->peft) (3.10)
25
+ Requirement already satisfied: urllib3<3,>=1.21.1 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from requests->huggingface-hub>=0.17.0->peft) (2.5.0)
26
+ Requirement already satisfied: certifi>=2017.4.17 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from requests->huggingface-hub>=0.17.0->peft) (2025.8.3)
27
+ Requirement already satisfied: regex!=2019.12.17 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from transformers->peft) (2025.7.34)
28
+ Requirement already satisfied: tokenizers<0.22,>=0.21 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from transformers->peft) (0.21.4)
0.7.0 ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Requirement already satisfied: peft in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (0.8.0)
2
+ Requirement already satisfied: numpy>=1.17 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (2.2.6)
3
+ Requirement already satisfied: packaging>=20.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (25.0)
4
+ Requirement already satisfied: psutil in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (7.0.0)
5
+ Requirement already satisfied: pyyaml in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (6.0.2)
6
+ Requirement already satisfied: torch>=1.13.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (2.7.1)
7
+ Requirement already satisfied: transformers in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (4.54.1)
8
+ Requirement already satisfied: tqdm in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (4.67.1)
9
+ Requirement already satisfied: accelerate>=0.21.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (1.9.0)
10
+ Requirement already satisfied: safetensors in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (0.5.3)
11
+ Requirement already satisfied: huggingface-hub>=0.17.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (0.34.3)
12
+ Requirement already satisfied: filelock in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from huggingface-hub>=0.17.0->peft) (3.18.0)
13
+ Requirement already satisfied: fsspec>=2023.5.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from huggingface-hub>=0.17.0->peft) (2025.7.0)
14
+ Requirement already satisfied: requests in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from huggingface-hub>=0.17.0->peft) (2.32.4)
15
+ Requirement already satisfied: typing-extensions>=3.7.4.3 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from huggingface-hub>=0.17.0->peft) (4.14.1)
16
+ Requirement already satisfied: sympy>=1.13.3 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from torch>=1.13.0->peft) (1.14.0)
17
+ Requirement already satisfied: networkx in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from torch>=1.13.0->peft) (3.5)
18
+ Requirement already satisfied: jinja2 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from torch>=1.13.0->peft) (3.1.6)
19
+ Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from sympy>=1.13.3->torch>=1.13.0->peft) (1.3.0)
20
+ Requirement already satisfied: colorama in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from tqdm->peft) (0.4.6)
21
+ Requirement already satisfied: MarkupSafe>=2.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from jinja2->torch>=1.13.0->peft) (3.0.2)
22
+ Requirement already satisfied: charset_normalizer<4,>=2 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from requests->huggingface-hub>=0.17.0->peft) (3.4.2)
23
+ Requirement already satisfied: idna<4,>=2.5 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from requests->huggingface-hub>=0.17.0->peft) (3.10)
24
+ Requirement already satisfied: urllib3<3,>=1.21.1 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from requests->huggingface-hub>=0.17.0->peft) (2.5.0)
25
+ Requirement already satisfied: certifi>=2017.4.17 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from requests->huggingface-hub>=0.17.0->peft) (2025.8.3)
26
+ Requirement already satisfied: regex!=2019.12.17 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from transformers->peft) (2025.7.34)
27
+ Requirement already satisfied: tokenizers<0.22,>=0.21 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from transformers->peft) (0.21.4)
README_CONTEXT_LORA.md ADDED
@@ -0,0 +1,331 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Lily LLM - ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ ๋ฐ LoRA/QLoRA ์‹œ์Šคํ…œ
2
+
3
+ ## ๐Ÿ“‹ ๊ฐœ์š”
4
+
5
+ Lily LLM ํ”„๋กœ์ ํŠธ์— ๋‹จ๊ธฐ ๊ธฐ์–ต(์ปจํ…์ŠคํŠธ ์ฐฝ) ๊ธฐ๋Šฅ๊ณผ LoRA/QLoRA ์ง€์›์„ ์ถ”๊ฐ€ํ•˜์—ฌ ๋”์šฑ ๊ฐ•๋ ฅํ•˜๊ณ  ํšจ์œจ์ ์ธ AI ๋Œ€ํ™” ์‹œ์Šคํ…œ์„ ๊ตฌ์ถ•ํ–ˆ์Šต๋‹ˆ๋‹ค.
6
+
7
+ ## ๐Ÿš€ ์ฃผ์š” ๊ธฐ๋Šฅ
8
+
9
+ ### 1. ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ ์‹œ์Šคํ…œ (Context Management)
10
+
11
+ #### ๐Ÿ”ง ํ•ต์‹ฌ ๊ธฐ๋Šฅ
12
+ - **๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ ๊ด€๋ฆฌ**: ์‚ฌ์šฉ์ž์™€ AI ๊ฐ„์˜ ๋Œ€ํ™”๋ฅผ ์ˆœ์ฐจ์ ์œผ๋กœ ์ €์žฅ
13
+ - **๋ฉ”๋ชจ๋ฆฌ ์ตœ์ ํ™”**: ์„ค์ •๋œ ์ œํ•œ์— ๋„๋‹ฌํ•˜๋ฉด ์ž๋™์œผ๋กœ ์ปจํ…์ŠคํŠธ ์••์ถ•
14
+ - **์„ธ์…˜ ๊ด€๋ฆฌ**: ์—ฌ๋Ÿฌ ๋Œ€ํ™” ์„ธ์…˜์„ ๋…๋ฆฝ์ ์œผ๋กœ ๊ด€๋ฆฌ
15
+ - **์ปจํ…์ŠคํŠธ ๊ฒ€์ƒ‰**: ์ €์žฅ๋œ ๋Œ€ํ™” ๋‚ด์šฉ์—์„œ ํŠน์ • ์ •๋ณด ๊ฒ€์ƒ‰
16
+
17
+ #### ๐Ÿ“Š ์ปจํ…์ŠคํŠธ ์ „๋žต
18
+ - **Sliding Window**: ์ตœ๊ทผ ๋ฉ”์‹œ์ง€ ์šฐ์„  ์œ ์ง€
19
+ - **Priority Keep**: ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ์™€ ์ตœ๊ทผ ๋ฉ”์‹œ์ง€ ์šฐ์„ 
20
+ - **Circular Buffer**: ์ˆœํ™˜ ๋ฐฉ์‹์œผ๋กœ ๋ฉ”๋ชจ๋ฆฌ ๊ด€๋ฆฌ
21
+
22
+ #### ๐Ÿ’พ ๋ฐ์ดํ„ฐ ๊ด€๋ฆฌ
23
+ - **๋‚ด๋ณด๋‚ด๊ธฐ/๊ฐ€์ ธ์˜ค๊ธฐ**: JSON ํ˜•์‹์œผ๋กœ ์ปจํ…์ŠคํŠธ ์ €์žฅ ๋ฐ ๋ณต์›
24
+ - **๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์ง€์›**: ๊ฐ ๋ฉ”์‹œ์ง€์— ์ถ”๊ฐ€ ์ •๋ณด ์ฒจ๋ถ€ ๊ฐ€๋Šฅ
25
+ - **ํ†ต๊ณ„ ์ •๋ณด**: ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰ ๋ฐ ํšจ์œจ์„ฑ ์ง€ํ‘œ ์ œ๊ณต
26
+
27
+ ### 2. LoRA/QLoRA ์ง€์› ์‹œ์Šคํ…œ
28
+
29
+ #### ๐Ÿ”— LoRA (Low-Rank Adaptation)
30
+ - **ํšจ์œจ์ ์ธ ํŒŒ์ธํŠœ๋‹**: ์ „์ฒด ๋ชจ๋ธ ๋Œ€์‹  ์ผ๋ถ€ ํŒŒ๋ผ๋ฏธํ„ฐ๋งŒ ํ›ˆ๋ จ
31
+ - **๋ฉ”๋ชจ๋ฆฌ ์ ˆ์•ฝ**: GPU ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰ ๋Œ€ํญ ๊ฐ์†Œ
32
+ - **๋น ๋ฅธ ์–ด๋Œ‘ํ„ฐ ์ „ํ™˜**: ์—ฌ๋Ÿฌ ์ž‘์—…๋ณ„ ์–ด๋Œ‘ํ„ฐ๋ฅผ ๋น ๋ฅด๊ฒŒ ๊ต์ฒด
33
+
34
+ #### ๐Ÿ“ˆ QLoRA (Quantized LoRA)
35
+ - **4๋น„ํŠธ ์–‘์žํ™”**: ๋ชจ๋ธ ํฌ๊ธฐ์™€ ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰ ์ถ”๊ฐ€ ๊ฐ์†Œ
36
+ - **๊ณ ํ’ˆ์งˆ ํ›ˆ๋ จ**: ์–‘์žํ™”๋œ ๋ชจ๋ธ์—์„œ๋„ ๋†’์€ ํ’ˆ์งˆ์˜ ํ›ˆ๋ จ ๊ฐ€๋Šฅ
37
+ - **ํ•˜๋“œ์›จ์–ด ํšจ์œจ์„ฑ**: ์ €์‚ฌ์–‘ GPU์—์„œ๋„ ํ›ˆ๋ จ ๊ฐ€๋Šฅ
38
+
39
+ #### ๐ŸŽฏ ์ง€์› ๋ชจ๋ธ
40
+ - **Causal Language Models**: GPT, LLaMA, Kanana ๋“ฑ
41
+ - **Sequence-to-Sequence**: T5, BART ๋“ฑ
42
+ - **Classification Models**: BERT, RoBERTa ๋“ฑ
43
+
44
+ ## ๐Ÿ› ๏ธ ์„ค์น˜ ๋ฐ ์„ค์ •
45
+
46
+ ### 1. ์˜์กด์„ฑ ์„ค์น˜
47
+
48
+ ```bash
49
+ pip install -r requirements.txt
50
+ ```
51
+
52
+ ### 2. ์ถ”๊ฐ€ ํŒจํ‚ค์ง€ ์„ค์น˜
53
+
54
+ ```bash
55
+ # LoRA/QLoRA ์ง€์›
56
+ pip install peft>=0.7.0
57
+ pip install bitsandbytes>=0.41.0
58
+
59
+ # ์„ ํƒ์ : ๋” ๋‚˜์€ ์„ฑ๋Šฅ์„ ์œ„ํ•œ ํŒจํ‚ค์ง€
60
+ pip install accelerate
61
+ pip install transformers[torch]
62
+ ```
63
+
64
+ ### 3. ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ •
65
+
66
+ ```bash
67
+ # GPU ์‚ฌ์šฉ ์„ค์ •
68
+ export CUDA_VISIBLE_DEVICES=0
69
+
70
+ # ๋ฉ”๋ชจ๋ฆฌ ์ตœ์ ํ™”
71
+ export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128
72
+ ```
73
+
74
+ ## ๐Ÿ“– ์‚ฌ์šฉ๋ฒ•
75
+
76
+ ### 1. ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ
77
+
78
+ #### ๊ธฐ๋ณธ ์‚ฌ์šฉ๋ฒ•
79
+
80
+ ```python
81
+ import requests
82
+
83
+ # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ์„ค์ •
84
+ response = requests.post("http://localhost:8001/context/set-system-prompt",
85
+ data={"prompt": "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด AI ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค."})
86
+
87
+ # ์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€
88
+ response = requests.post("http://localhost:8001/context/add-message",
89
+ data={
90
+ "role": "user",
91
+ "content": "์•ˆ๋…•ํ•˜์„ธ์š”!",
92
+ "metadata": '{"session_id": "session_1"}'
93
+ })
94
+
95
+ # ์–ด์‹œ์Šคํ„ดํŠธ ์‘๋‹ต ์ถ”๊ฐ€
96
+ response = requests.post("http://localhost:8001/context/add-message",
97
+ data={
98
+ "role": "assistant",
99
+ "content": "์•ˆ๋…•ํ•˜์„ธ์š”! ๋ฌด์—‡์„ ๋„์™€๋“œ๋ฆด๊นŒ์š”?",
100
+ "metadata": '{"session_id": "session_1"}'
101
+ })
102
+
103
+ # ์ปจํ…์ŠคํŠธ ์กฐํšŒ
104
+ response = requests.get("http://localhost:8001/context/get")
105
+ context = response.json()["context"]
106
+ ```
107
+
108
+ #### ๊ณ ๊ธ‰ ๊ธฐ๋Šฅ
109
+
110
+ ```python
111
+ # ์ปจํ…์ŠคํŠธ ๊ฒ€์ƒ‰
112
+ response = requests.get("http://localhost:8001/context/search?query=๋‚ ์”จ&max_results=5")
113
+
114
+ # ์ปจํ…์ŠคํŠธ ๋‚ด๋ณด๋‚ด๊ธฐ
115
+ response = requests.post("http://localhost:8001/context/export",
116
+ data={"file_path": "my_context.json"})
117
+
118
+ # ์ปจํ…์ŠคํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ
119
+ response = requests.post("http://localhost:8001/context/import",
120
+ data={"file_path": "my_context.json"})
121
+
122
+ # ์ปจํ…์ŠคํŠธ ํ†ต๊ณ„
123
+ response = requests.get("http://localhost:8001/context/summary")
124
+ ```
125
+
126
+ ### 2. LoRA/QLoRA ์‚ฌ์šฉ
127
+
128
+ #### ๊ธฐ๋ณธ ๋ชจ๋ธ ๋กœ๋“œ
129
+
130
+ ```python
131
+ # ๊ธฐ๋ณธ ๋ชจ๋ธ ๋กœ๋“œ
132
+ response = requests.post("http://localhost:8001/lora/load-base-model",
133
+ data={
134
+ "model_path": "/path/to/your/model",
135
+ "model_type": "causal_lm"
136
+ })
137
+ ```
138
+
139
+ #### LoRA ์„ค์ • ์ƒ์„ฑ
140
+
141
+ ```python
142
+ # LoRA ์„ค์ • ์ƒ์„ฑ
143
+ response = requests.post("http://localhost:8001/lora/create-config",
144
+ data={
145
+ "r": 16, # LoRA ๋žญํฌ
146
+ "lora_alpha": 32, # LoRA ์•ŒํŒŒ
147
+ "target_modules": "q_proj,v_proj,k_proj,o_proj", # ํƒ€๊ฒŸ ๋ชจ๋“ˆ
148
+ "lora_dropout": 0.1, # ๋“œ๋กญ์•„์›ƒ
149
+ "bias": "none", # ๋ฐ”์ด์–ด์Šค ์ฒ˜๋ฆฌ
150
+ "task_type": "CAUSAL_LM" # ์ž‘์—… ํƒ€์ž…
151
+ })
152
+ ```
153
+
154
+ #### ์–ด๋Œ‘ํ„ฐ ์ ์šฉ ๋ฐ ์‚ฌ์šฉ
155
+
156
+ ```python
157
+ # LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ
158
+ response = requests.post("http://localhost:8001/lora/apply",
159
+ data={"adapter_name": "my_adapter"})
160
+
161
+ # LoRA ๋ชจ๋ธ๋กœ ํ…์ŠคํŠธ ์ƒ์„ฑ
162
+ response = requests.post("http://localhost:8001/lora/generate",
163
+ data={
164
+ "prompt": "์•ˆ๋…•ํ•˜์„ธ์š”!",
165
+ "max_length": 100,
166
+ "temperature": 0.7
167
+ })
168
+
169
+ # ์–ด๋Œ‘ํ„ฐ ์ €์žฅ
170
+ response = requests.post("http://localhost:8001/lora/save-adapter",
171
+ data={"adapter_name": "my_adapter"})
172
+ ```
173
+
174
+ ### 3. ํ†ตํ•ฉ ์‚ฌ์šฉ (์ปจํ…์ŠคํŠธ + LoRA)
175
+
176
+ ```python
177
+ # ์ปจํ…์ŠคํŠธ๋ฅผ ์‚ฌ์šฉํ•œ ํ…์ŠคํŠธ ์ƒ์„ฑ
178
+ response = requests.post("http://localhost:8001/generate",
179
+ data={
180
+ "prompt": "์ด์ „ ๋Œ€ํ™”๋ฅผ ์ฐธ๊ณ ํ•ด์„œ ๋‹ต๋ณ€ํ•ด์ฃผ์„ธ์š”.",
181
+ "use_context": "true",
182
+ "session_id": "session_1"
183
+ })
184
+ ```
185
+
186
+ ## ๐Ÿ” API ์—”๋“œํฌ์ธํŠธ
187
+
188
+ ### ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ
189
+
190
+ | ๋ฉ”์„œ๋“œ | ์—”๋“œํฌ์ธํŠธ | ์„ค๋ช… |
191
+ |--------|------------|------|
192
+ | POST | `/context/set-system-prompt` | ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ์„ค์ • |
193
+ | POST | `/context/add-message` | ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€ |
194
+ | GET | `/context/get` | ์ปจํ…์ŠคํŠธ ์กฐํšŒ |
195
+ | GET | `/context/summary` | ์ปจํ…์ŠคํŠธ ์š”์•ฝ |
196
+ | POST | `/context/clear` | ์ปจํ…์ŠคํŠธ ์ดˆ๊ธฐํ™” |
197
+ | DELETE | `/context/message/{message_id}` | ๋ฉ”์‹œ์ง€ ์ œ๊ฑฐ |
198
+ | PUT | `/context/message/{message_id}` | ๋ฉ”์‹œ์ง€ ์ˆ˜์ • |
199
+ | GET | `/context/search` | ์ปจํ…์ŠคํŠธ ๊ฒ€์ƒ‰ |
200
+ | POST | `/context/export` | ์ปจํ…์ŠคํŠธ ๋‚ด๋ณด๋‚ด๊ธฐ |
201
+ | POST | `/context/import` | ์ปจํ…์ŠคํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ |
202
+
203
+ ### LoRA ๊ด€๋ฆฌ
204
+
205
+ | ๋ฉ”์„œ๋“œ | ์—”๋“œํฌ์ธํŠธ | ์„ค๋ช… |
206
+ |--------|------------|------|
207
+ | POST | `/lora/load-base-model` | ๊ธฐ๋ณธ ๋ชจ๋ธ ๋กœ๋“œ |
208
+ | POST | `/lora/create-config` | LoRA ์„ค์ • ์ƒ์„ฑ |
209
+ | POST | `/lora/apply` | LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ |
210
+ | POST | `/lora/load-adapter` | ์ €์žฅ๋œ ์–ด๋Œ‘ํ„ฐ ๋กœ๋“œ |
211
+ | POST | `/lora/save-adapter` | ์–ด๋Œ‘ํ„ฐ ์ €์žฅ |
212
+ | GET | `/lora/adapters` | ์–ด๋Œ‘ํ„ฐ ๋ชฉ๋ก |
213
+ | GET | `/lora/stats` | ์–ด๋Œ‘ํ„ฐ ํ†ต๊ณ„ |
214
+ | POST | `/lora/switch` | ์–ด๋Œ‘ํ„ฐ ์ „ํ™˜ |
215
+ | POST | `/lora/unload` | ์–ด๋Œ‘ํ„ฐ ์–ธ๋กœ๋“œ |
216
+ | POST | `/lora/generate` | LoRA ๋ชจ๋ธ๋กœ ์ƒ์„ฑ |
217
+ | POST | `/lora/merge` | ์–ด๋Œ‘ํ„ฐ ๋ณ‘ํ•ฉ |
218
+
219
+ ## ๐Ÿ“Š ์„ฑ๋Šฅ ์ตœ์ ํ™”
220
+
221
+ ### 1. ๋ฉ”๋ชจ๋ฆฌ ๊ด€๋ฆฌ
222
+
223
+ - **์ปจํ…์ŠคํŠธ ์••์ถ•**: ์ž๋™ ๋ฉ”๋ชจ๋ฆฌ ์ตœ์ ํ™”
224
+ - **ํ† ํฐ ์ œํ•œ**: ์„ค์ • ๊ฐ€๋Šฅํ•œ ์ตœ๋Œ€ ํ† ํฐ ์ˆ˜
225
+ - **์„ธ์…˜ ๋ถ„๋ฆฌ**: ๋…๋ฆฝ์ ์ธ ๋ฉ”๋ชจ๋ฆฌ ๊ณต๊ฐ„
226
+
227
+ ### 2. LoRA ์ตœ์ ํ™”
228
+
229
+ - **๋žญํฌ ์กฐ์ •**: r ๊ฐ’์œผ๋กœ ์ •ํ™•๋„์™€ ํšจ์œจ์„ฑ ๊ท ํ˜•
230
+ - **ํƒ€๊ฒŸ ๋ชจ๋“ˆ ์„ ํƒ**: ํ•„์š”ํ•œ ๋ ˆ์ด์–ด๋งŒ ์„ ํƒ์  ํ›ˆ๋ จ
231
+ - **๊ทธ๋ž˜๋””์–ธํŠธ ์ฒดํฌํฌ์ธํŒ…**: ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰ ๊ฐ์†Œ
232
+
233
+ ### 3. ํ•˜๋“œ์›จ์–ด ์ตœ์ ํ™”
234
+
235
+ - **GPU ๋ฉ”๋ชจ๋ฆฌ**: ํšจ์œจ์ ์ธ ๋ฉ”๋ชจ๋ฆฌ ํ• ๋‹น
236
+ - **CPU ์Šค๋ ˆ๋“œ**: ๋ฉ€ํ‹ฐ์Šค๋ ˆ๋”ฉ ์ตœ์ ํ™”
237
+ - **๋ฐฐ์น˜ ์ฒ˜๋ฆฌ**: ๋Œ€๋Ÿ‰ ๋ฐ์ดํ„ฐ ์ฒ˜๋ฆฌ ์ตœ์ ํ™”
238
+
239
+ ## ๐Ÿงช ํ…Œ์ŠคํŠธ
240
+
241
+ ### ํ…Œ์ŠคํŠธ ์Šคํฌ๋ฆฝํŠธ ์‹คํ–‰
242
+
243
+ ```bash
244
+ python test_context_lora.py
245
+ ```
246
+
247
+ ### ์ˆ˜๋™ ํ…Œ์ŠคํŠธ
248
+
249
+ ```bash
250
+ # ์„œ๋ฒ„ ์‹œ์ž‘
251
+ python run_server.py
252
+
253
+ # ๋‹ค๋ฅธ ํ„ฐ๋ฏธ๋„์—์„œ ํ…Œ์ŠคํŠธ
254
+ curl -X POST "http://localhost:8001/context/set-system-prompt" \
255
+ -d "prompt=๋‹น์‹ ์€ ํ•œ๊ตญ์–ด AI ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค."
256
+
257
+ curl -X GET "http://localhost:8001/context/summary"
258
+ ```
259
+
260
+ ## ๐Ÿ”ง ์„ค์ • ์˜ต์…˜
261
+
262
+ ### ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž ์„ค์ •
263
+
264
+ ```python
265
+ # ContextManager ์ดˆ๊ธฐํ™” ์‹œ ์„ค์ •
266
+ context_manager = ContextManager(
267
+ max_tokens=4000, # ์ตœ๋Œ€ ํ† ํฐ ์ˆ˜
268
+ max_turns=20, # ์ตœ๋Œ€ ๋Œ€ํ™” ํ„ด ์ˆ˜
269
+ strategy="sliding_window" # ์••์ถ• ์ „๋žต
270
+ )
271
+ ```
272
+
273
+ ### LoRA ์„ค์ •
274
+
275
+ ```python
276
+ # LoRA ์„ค์ • ์˜ˆ์‹œ
277
+ lora_config = LoraConfig(
278
+ r=16, # LoRA ๋žญํฌ (๋†’์„์ˆ˜๋ก ์ •ํ™•๋„ ํ–ฅ์ƒ, ๋ฉ”๋ชจ๋ฆฌ ์ฆ๊ฐ€)
279
+ lora_alpha=32, # LoRA ์•ŒํŒŒ (์Šค์ผ€์ผ๋ง ํŒฉํ„ฐ)
280
+ target_modules=["q_proj", "v_proj", "k_proj", "o_proj"], # ํƒ€๊ฒŸ ๋ชจ๋“ˆ
281
+ lora_dropout=0.1, # ๋“œ๋กญ์•„์›ƒ ๋น„์œจ
282
+ bias="none", # ๋ฐ”์ด์–ด์Šค ์ฒ˜๋ฆฌ ๋ฐฉ์‹
283
+ task_type="CAUSAL_LM" # ์ž‘์—… ํƒ€์ž…
284
+ )
285
+ ```
286
+
287
+ ## ๐Ÿšจ ์ฃผ์˜์‚ฌํ•ญ
288
+
289
+ ### 1. ๋ฉ”๋ชจ๋ฆฌ ๊ด€๋ฆฌ
290
+ - **์ปจํ…์ŠคํŠธ ๊ธธ์ด**: ๋„ˆ๋ฌด ๊ธด ์ปจํ…์ŠคํŠธ๋Š” ๋ฉ”๋ชจ๋ฆฌ ๋ถ€์กฑ์„ ์•ผ๊ธฐํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค
291
+ - **LoRA ๋žญํฌ**: ๋†’์€ ๋žญํฌ๋Š” ์ •ํ™•๋„๋ฅผ ๋†’์ด์ง€๋งŒ ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰๋„ ์ฆ๊ฐ€ํ•ฉ๋‹ˆ๋‹ค
292
+
293
+ ### 2. ์„ฑ๋Šฅ ๊ณ ๋ ค์‚ฌํ•ญ
294
+ - **GPU ๋ฉ”๋ชจ๋ฆฌ**: LoRA ํ›ˆ๋ จ ์‹œ ์ถฉ๋ถ„ํ•œ GPU ๋ฉ”๋ชจ๋ฆฌ๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค
295
+ - **CPU ์‚ฌ์šฉ๋Ÿ‰**: ์ปจํ…์ŠคํŠธ ์••์ถ• ์‹œ CPU ๋ฆฌ์†Œ์Šค๊ฐ€ ์‚ฌ์šฉ๋ฉ๋‹ˆ๋‹ค
296
+
297
+ ### 3. ํ˜ธํ™˜์„ฑ
298
+ - **๋ชจ๋ธ ํƒ€์ž…**: ๋ชจ๋“  ๋ชจ๋ธ์ด LoRA๋ฅผ ์ง€์›ํ•˜์ง€ ์•Š์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค
299
+ - **๋ฒ„์ „ ํ˜ธํ™˜์„ฑ**: PEFT์™€ Transformers ๋ฒ„์ „ ํ˜ธํ™˜์„ฑ์„ ํ™•์ธํ•˜์„ธ์š”
300
+
301
+ ## ๐Ÿ“š ์ถ”๊ฐ€ ์ž๋ฃŒ
302
+
303
+ ### ๊ด€๋ จ ๋ฌธ์„œ
304
+ - [PEFT ๊ณต์‹ ๋ฌธ์„œ](https://huggingface.co/docs/peft)
305
+ - [LoRA ๋…ผ๋ฌธ](https://arxiv.org/abs/2106.09685)
306
+ - [QLoRA ๋…ผ๋ฌธ](https://arxiv.org/abs/2305.14314)
307
+
308
+ ### ์˜ˆ์ œ ์ฝ”๋“œ
309
+ - `test_context_lora.py`: ํ†ตํ•ฉ ํ…Œ์ŠคํŠธ ์Šคํฌ๋ฆฝํŠธ
310
+ - `examples/`: ์ถ”๊ฐ€ ์‚ฌ์šฉ ์˜ˆ์ œ๋“ค
311
+
312
+ ### ์ปค๋ฎค๋‹ˆํ‹ฐ
313
+ - [Hugging Face PEFT](https://huggingface.co/docs/peft)
314
+ - [GitHub Issues](https://github.com/your-repo/issues)
315
+
316
+ ## ๐Ÿค ๊ธฐ์—ฌํ•˜๊ธฐ
317
+
318
+ ๋ฒ„๊ทธ ๋ฆฌํฌํŠธ, ๊ธฐ๋Šฅ ์ œ์•ˆ, ์ฝ”๋“œ ๊ธฐ์—ฌ๋ฅผ ํ™˜์˜ํ•ฉ๋‹ˆ๋‹ค!
319
+
320
+ 1. ์ด์Šˆ ์ƒ์„ฑ
321
+ 2. ํฌํฌ ํ›„ ๋ธŒ๋žœ์น˜ ์ƒ์„ฑ
322
+ 3. ๋ณ€๊ฒฝ์‚ฌํ•ญ ์ปค๋ฐ‹
323
+ 4. Pull Request ์ƒ์„ฑ
324
+
325
+ ## ๐Ÿ“„ ๋ผ์ด์„ ์Šค
326
+
327
+ ์ด ํ”„๋กœ์ ํŠธ๋Š” MIT ๋ผ์ด์„ ์Šค ํ•˜์— ๋ฐฐํฌ๋ฉ๋‹ˆ๋‹ค.
328
+
329
+ ---
330
+
331
+ **Lily LLM** - ๋” ์Šค๋งˆํŠธํ•œ AI ๋Œ€ํ™”๋ฅผ ์œ„ํ•œ ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ ๋ฐ LoRA ์‹œ์Šคํ…œ ๐Ÿš€
download_kanana_model.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ from transformers import AutoModelForVision2Seq, AutoProcessor
4
+ from dotenv import load_dotenv
5
+
6
+ # --- ์„ค์ • ๋ถ€๋ถ„ ---
7
+ MODEL_NAME = "kakaocorp/kanana-1.5-v-3b-instruct"
8
+ SAVE_DIRECTORY = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
9
+ # --- ์„ค์ • ๋ ---
10
+
11
+ load_dotenv()
12
+ HF_TOKEN = os.getenv("HF_TOKEN")
13
+
14
+ print("="*60)
15
+ print(f"'{MODEL_NAME}' ๋ชจ๋ธ ๋ฐ ํ”„๋กœ์„ธ์„œ ๊ณต์‹ ๋‹ค์šด๋กœ๋“œ๋ฅผ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค.")
16
+ print(f"์ €์žฅ ๊ฒฝ๋กœ: {SAVE_DIRECTORY}")
17
+ print("="*60)
18
+
19
+ try:
20
+ # 1. ๊ณต์‹ AutoProcessor ๋กœ๋“œ ๋ฐ ๋‹ค์šด๋กœ๋“œ
21
+ print("\n[1/2] ํ”„๋กœ์„ธ์„œ(Tokenizer+Image Processor) ๋‹ค์šด๋กœ๋“œ ์ค‘...")
22
+ processor = AutoProcessor.from_pretrained(
23
+ MODEL_NAME,
24
+ token=HF_TOKEN,
25
+ trust_remote_code=True
26
+ )
27
+ print("โœ… ํ”„๋กœ์„ธ์„œ ๋‹ค์šด๋กœ๋“œ ์„ฑ๊ณต!")
28
+
29
+ # 2. ๊ณต์‹ AutoModelForVision2Seq ๋กœ๋“œ ๋ฐ ๋‹ค์šด๋กœ๋“œ
30
+ print("\n[2/2] ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ ์ค‘... (์‹œ๊ฐ„์ด ๊ฑธ๋ฆด ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค)")
31
+ model = AutoModelForVision2Seq.from_pretrained(
32
+ MODEL_NAME,
33
+ token=HF_TOKEN,
34
+ torch_dtype=torch.bfloat16, # ๊ณต์‹ ์˜ˆ์ œ์™€ ๋™์ผํ•˜๊ฒŒ bfloat16 ์‚ฌ์šฉ
35
+ trust_remote_code=True
36
+ )
37
+ print("โœ… ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ ์„ฑ๊ณต!")
38
+
39
+ # 3. ๋กœ์ปฌ ๊ฒฝ๋กœ์— ๋ชจ๋ธ๊ณผ ํ”„๋กœ์„ธ์„œ ๋ชจ๋‘ ์ €์žฅ
40
+ print(f"\n[+] '{SAVE_DIRECTORY}' ๊ฒฝ๋กœ์— ๋ชจ๋“  ํŒŒ์ผ ์ €์žฅ ์ค‘...")
41
+ if not os.path.exists(SAVE_DIRECTORY):
42
+ os.makedirs(SAVE_DIRECTORY)
43
+
44
+ processor.save_pretrained(SAVE_DIRECTORY)
45
+ model.save_pretrained(SAVE_DIRECTORY)
46
+ print("โœ… ๋ชจ๋ธ๊ณผ ํ”„๋กœ์„ธ์„œ ์ €์žฅ ์™„๋ฃŒ!")
47
+
48
+ print("\n" + "="*60)
49
+ print("๐ŸŽ‰ ๋ชจ๋“  ์ž‘์—…์ด ์„ฑ๊ณต์ ์œผ๋กœ ์™„๋ฃŒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค!")
50
+ print("์ด์ œ `kanana_1_5_v_3b_instruct.py` ํŒŒ์ผ์„ ์ˆ˜์ •ํ•˜๊ณ  ์„œ๋ฒ„๋ฅผ ์‹คํ–‰ํ•˜์„ธ์š”.")
51
+ print("="*60)
52
+
53
+ except Exception as e:
54
+ import traceback
55
+ print(f"\nโŒ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
56
+ traceback.print_exc()
57
+ print("\nํ—ˆ๊น…ํŽ˜์ด์Šค ํ† ํฐ์ด ์˜ฌ๋ฐ”๋ฅธ์ง€, `huggingface-cli login`์„ ์‹คํ–‰ํ–ˆ๋Š”์ง€ ํ™•์ธํ•˜์„ธ์š”.")
lily_llm_api/app_v2.py CHANGED
@@ -21,6 +21,10 @@ import os
21
  import json
22
  from pathlib import Path
23
 
 
 
 
 
24
  from .models import get_model_profile, list_available_models
25
  from lily_llm_core.rag_processor import rag_processor
26
  from lily_llm_core.document_processor import document_processor
@@ -45,10 +49,21 @@ from lily_llm_core.vector_store_manager import vector_store_manager
45
  # from latex_ocr_faiss_simple import LatexOCRFAISSSimple
46
 
47
  # ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ํ”„๋กœ์„ธ์„œ ์ถ”๊ฐ€
48
- # from lily_llm_core.hybrid_rag_processor import hybrid_rag_processor
49
-
50
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
51
- logger = logging.getLogger(__name__)
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  # FastAPI ์•ฑ ์ƒ์„ฑ
54
  app = FastAPI(
@@ -160,6 +175,7 @@ class TokenResponse(BaseModel):
160
  # ์ „์—ญ ๋ณ€์ˆ˜
161
  model = None
162
  tokenizer = None
 
163
  current_profile = None
164
  model_loaded = False
165
  image_processor = None
@@ -240,66 +256,40 @@ async def load_model_async(model_id: str):
240
  await loop.run_in_executor(executor, load_model_sync, model_id)
241
 
242
  def load_model_sync(model_id: str):
243
- """๋ชจ๋ธ ๋ฐ ๊ด€๋ จ ํ”„๋กœ์„ธ์„œ๋ฅผ ๋™๊ธฐ์ ์œผ๋กœ ๋กœ๋”ฉ"""
244
- global model, tokenizer, image_processor, current_profile
245
 
246
  try:
247
- # ๊ธฐ์กด ๋ชจ๋ธ ์–ธ๋กœ๋“œ (๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ)
248
  if model is not None:
249
  logger.info("๐Ÿ—‘๏ธ ๊ธฐ์กด ๋ชจ๋ธ ์–ธ๋กœ๋“œ ์ค‘...")
250
  del model
251
  del tokenizer
252
- model = None
253
- tokenizer = None
254
  import gc
255
  gc.collect()
256
  logger.info("โœ… ๊ธฐ์กด ๋ชจ๋ธ ์–ธ๋กœ๋“œ ์™„๋ฃŒ")
257
 
258
  logger.info(f"๐Ÿ“ฅ '{model_id}' ๋ชจ๋ธ ๋กœ๋”ฉ ์‹œ์ž‘...")
259
  current_profile = get_model_profile(model_id)
260
- model, tokenizer = current_profile.load_model()
261
 
262
- # ๋ชจ๋ธ ์ •๋ณด ๋””๋ฒ„๊ทธ
263
- if hasattr(model, 'num_parameters'):
264
- logger.info(f"๐Ÿ“Š ๋ชจ๋ธ ํŒŒ๋ผ๋ฏธํ„ฐ ์ˆ˜: {model.num_parameters():,}")
265
-
266
- # ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ํ† ํฌ๋‚˜์ด์ € ์„ค์ •
267
- if getattr(current_profile, 'multimodal', False):
268
- logger.info("๐Ÿ”ง ํ† ํฌ๋‚˜์ด์ € ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๊ธฐ๋Šฅ ํ™œ์„ฑํ™”...")
269
- tokenizer.mllm_setup(num_visual_tokens=1)
270
- from transformers import AutoImageProcessor
271
-
272
- # ํ™˜๊ฒฝ์— ๋”ฐ๋ผ ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ ๋กœ๋”ฉ ๋ถ„๊ธฐ
273
- if current_profile.is_local:
274
- # ๋กœ์ปฌ ํ™˜๊ฒฝ: ๋กœ์ปฌ ๊ฒฝ๋กœ ์‚ฌ์šฉ
275
- logger.info("๐Ÿ  ๋กœ์ปฌ ํ™˜๊ฒฝ์—์„œ ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ ๋กœ๋”ฉ")
276
- image_processor = AutoImageProcessor.from_pretrained(
277
- current_profile.local_path,
278
- trust_remote_code=True,
279
- local_files_only=True,
280
- )
281
- else:
282
- # ์„œ๋ฒ„ ํ™˜๊ฒฝ: ๋ชจ๋ธ๋ช… ์‚ฌ์šฉ, HF ํ† ํฐ ํ•„์š”
283
- logger.info("โ˜๏ธ ์„œ๋ฒ„ ํ™˜๊ฒฝ์—์„œ ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ ๋กœ๋”ฉ")
284
- import os
285
- HF_TOKEN = os.getenv("HF_TOKEN")
286
- image_processor = AutoImageProcessor.from_pretrained(
287
- current_profile.model_name, # local_path ๋Œ€์‹  model_name ์‚ฌ์šฉ
288
- token=HF_TOKEN,
289
- trust_remote_code=True,
290
- local_files_only=False, # ์„œ๋ฒ„์—์„œ๋Š” False
291
- )
292
- logger.info("โœ… ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ ๋กœ๋“œ ์„ฑ๊ณต!")
293
  else:
294
- image_processor = None
295
-
 
296
  logger.info(f"โœ… '{current_profile.display_name}' ๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ!")
297
 
298
  except Exception as e:
299
  logger.error(f"โŒ load_model_sync ์‹คํŒจ: {e}")
300
  import traceback
301
  logger.error(f"๐Ÿ” ์ „์ฒด ์—๋Ÿฌ: {traceback.format_exc()}")
302
- raise
303
 
304
  def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_length: Optional[int] = None,
305
  temperature: Optional[float] = None, top_p: Optional[float] = None,
@@ -474,324 +464,172 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
474
  logger.error(f"โŒ ์‘๋‹ต ์ถ”์ถœ ์ค‘ ์˜ค๋ฅ˜: {e}")
475
  raise HTTPException(status_code=500, detail=f"์‘๋‹ต ์ถ”์ถœ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}")
476
 
 
 
 
 
 
 
 
 
 
 
477
  @app.post("/generate", response_model=GenerateResponse)
478
  async def generate(prompt: str = Form(...),
479
  image1: UploadFile = File(None),
480
  image2: UploadFile = File(None),
481
  image3: UploadFile = File(None),
482
  image4: UploadFile = File(None),
483
- max_length: Optional[int] = Form(None),
484
- temperature: Optional[float] = Form(None),
485
- top_p: Optional[float] = Form(None),
486
- do_sample: Optional[bool] = Form(None)):
487
- """[ํ†ตํ•ฉ ์—”๋“œํฌ์ธํŠธ] ํ…์ŠคํŠธ ๋ฐ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ"""
488
  if not model_loaded:
489
  raise HTTPException(status_code=503, detail="๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
490
 
491
  start_time = time.time()
492
- loop = asyncio.get_event_loop()
493
-
494
- # ๋‹ค์ค‘ ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ
495
- image_data_list = []
496
- for img in [image1, image2, image3, image4]:
497
- if img:
498
- image_data = await img.read()
499
- image_data_list.append(image_data)
500
-
501
- # ๋‹จ์ผ ์‹คํ–‰ ๋ณด์žฅ: generate_sync๋Š” ์˜ค์ง ํ•œ ๋ฒˆ๋งŒ ํ˜ธ์ถœ
502
- result = await loop.run_in_executor(
503
- executor,
504
- generate_sync,
505
- prompt,
506
- image_data_list,
507
- max_length,
508
- temperature,
509
- top_p,
510
- do_sample,
511
- )
512
-
513
- processing_time = time.time() - start_time
514
- logger.info(f"โœ… ์ƒ์„ฑ ์™„๋ฃŒ ({processing_time:.2f}์ดˆ), ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ: {result['image_processed']}")
515
-
516
- return GenerateResponse(
517
- generated_text=result["text"],
518
- processing_time=processing_time,
519
- model_name=current_profile.display_name,
520
- image_processed=result["image_processed"]
521
- )
522
 
523
- @app.post("/generate-multimodal", response_model=MultimodalGenerateResponse)
524
- async def generate_multimodal(
525
- prompt: str = Form(...),
526
- image: UploadFile = File(None),
527
- model_id: Optional[str] = Form(None),
528
- max_length: Optional[int] = Form(None),
529
- temperature: Optional[float] = Form(None),
530
- top_p: Optional[float] = Form(None),
531
- do_sample: Optional[bool] = Form(None)
532
- ):
533
- """๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ํ…์ŠคํŠธ ์ƒ์„ฑ (์ด๋ฏธ์ง€ + ํ…์ŠคํŠธ)"""
534
- start_time = time.time()
535
-
536
- try:
537
- # ๋ชจ๋ธ ๋กœ๋“œ ํ™•์ธ
538
- if not model_loaded or not model or not tokenizer or not current_profile:
539
- raise HTTPException(status_code=500, detail="๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค")
540
-
541
- # Kanana ๋ชจ๋ธ์ด ์•„๋‹ˆ๋ฉด ์ผ๋ฐ˜ ํ…์ŠคํŠธ ์ƒ์„ฑ์œผ๋กœ ๋ฆฌ๋‹ค์ด๋ ‰ํŠธ
542
- if "kanana" not in current_profile.model_name.lower():
543
- logger.warning("๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์š”์ฒญ์ด์ง€๋งŒ Kanana ๋ชจ๋ธ์ด ์•„๋‹˜ - ์ผ๋ฐ˜ ํ…์ŠคํŠธ ์ƒ์„ฑ์œผ๋กœ ์ฒ˜๋ฆฌ")
544
- # ์ผ๋ฐ˜ generate ์—”๋“œํฌ์ธํŠธ๋กœ ๋ฆฌ๋‹ค์ด๋ ‰ํŠธ
545
- loop = asyncio.get_event_loop()
546
- result = await loop.run_in_executor(executor, generate_sync, prompt, None,
547
- max_length, temperature, top_p, do_sample)
548
- return MultimodalGenerateResponse(
549
- generated_text=result["text"],
550
- processing_time=time.time() - start_time,
551
- model_name=current_profile.display_name,
552
- model_id=model_id or current_profile.get_model_info()["model_name"],
553
- image_processed=False
554
- )
555
-
556
- logger.info(f"๐Ÿ–ผ๏ธ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ ์‹œ์ž‘: '{prompt}'")
557
-
558
- # ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ
559
- pixel_values = None
560
- image_metas = None
561
- image_processed = False
562
- if image:
563
- logger.info(f"๐Ÿ“ธ ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ์ค‘: {image.filename}")
564
  try:
565
- # ์ด๋ฏธ์ง€ ํŒŒ์ผ ์ฝ๊ธฐ
566
- image_data = await image.read()
567
- pil_image = Image.open(io.BytesIO(image_data))
568
-
569
- # ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ (Kanana ๋ชจ๋ธ์— ๋งž๊ฒŒ)
570
- logger.info(f"โœ… ์ด๋ฏธ์ง€ ๋กœ๋“œ ์™„๋ฃŒ: {pil_image.size}")
571
-
572
- # Kanana ๋ชจ๋ธ์˜ ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ
573
- from transformers import AutoImageProcessor
574
-
575
- # ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ ๋กœ๋“œ
576
- image_processor = AutoImageProcessor.from_pretrained(
577
- current_profile.get_model_info()["local_path"],
578
- trust_remote_code=True
579
- )
580
-
581
- # ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ (Kanana ๋ฐฉ์‹)
582
- processed_images = image_processor(pil_image)
583
- pixel_values = processed_images["pixel_values"]
584
- image_metas = processed_images["image_meta"]
585
-
586
- # ๋””๋ฐ”์ด์Šค๋กœ ์ด๋™
587
- pixel_values = pixel_values.to(model.device)
588
- if image_metas and "vision_grid_thw" in image_metas:
589
- # vision_grid_thw๊ฐ€ ์Šค์นผ๋ผ๊ฐ€ ์•„๋‹Œ ํ…์„œ์ธ์ง€ ํ™•์ธ
590
- grid_thw = image_metas["vision_grid_thw"]
591
- if isinstance(grid_thw, (list, tuple)):
592
- grid_thw = torch.tensor(grid_thw)
593
- elif not isinstance(grid_thw, torch.Tensor):
594
- grid_thw = torch.tensor([grid_thw])
595
- image_metas["vision_grid_thw"] = grid_thw.to(model.device)
596
-
597
- # ๋””๋ฒ„๊น…์„ ์œ„ํ•œ ๋กœ๊ทธ ์ถ”๊ฐ€
598
- logger.info(f"๐Ÿ” pixel_values ํ˜•ํƒœ: {pixel_values.shape}")
599
- logger.info(f"๐Ÿ” image_metas keys: {list(image_metas.keys()) if image_metas else 'None'}")
600
- if image_metas and "vision_grid_thw" in image_metas:
601
- logger.info(f"๐Ÿ” vision_grid_thw ํ˜•ํƒœ: {image_metas['vision_grid_thw'].shape}")
602
-
603
- image_processed = True
604
- logger.info(f"โœ… ์ด๋ฏธ์ง€ ํ…์„œ ๋ณ€ํ™˜ ์™„๋ฃŒ: {pixel_values.shape}")
605
-
606
  except Exception as e:
607
- logger.error(f"โŒ ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
608
- pixel_values = None
609
- image_metas = None
610
- image_processed = False
611
- logger.info("๐Ÿ“ ์ด๋ฏธ์ง€ ์—†์ด ํ…์ŠคํŠธ๋งŒ ์ฒ˜๋ฆฌํ•ฉ๋‹ˆ๋‹ค.")
612
- else:
613
- logger.info("๐Ÿ“ธ ์ด๋ฏธ์ง€ ์—†์Œ - ํ…์ŠคํŠธ๋งŒ ์ฒ˜๋ฆฌ")
614
- image_processed = False
615
-
616
- # Kanana ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ํ”„๋กฌํ”„ํŠธ ํ˜•์‹
617
- system_prompt = "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด๋กœ ๋Œ€ํ™”ํ•˜๋Š” AI ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค. ๋ชจ๋“  ์‘๋‹ต์€ ํ•œ๊ตญ์–ด๋กœ ํ•ด์ฃผ์„ธ์š”."
618
- if image_processed:
619
- formatted_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
620
- logger.info(f"๐Ÿ–ผ๏ธ Kanana ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ํ”„๋กฌํ”„ํŠธ: '{formatted_prompt}'")
621
  else:
622
- formatted_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
623
- logger.info(f"๐Ÿ” Kanana ํ…์ŠคํŠธ ์ „์šฉ ํ”„๋กฌํ”„ํŠธ: '{formatted_prompt}'")
624
-
625
- # ํ† ํฌ๋‚˜์ด์ง•
626
- inputs = tokenizer(
627
- formatted_prompt,
628
- return_tensors="pt",
629
- padding=True,
630
- truncation=True,
631
- max_length=100,
632
- )
633
-
634
- if 'token_type_ids' in inputs:
635
- del inputs['token_type_ids']
636
-
637
- inputs = {k: v.to(model.device) for k, v in inputs.items()}
638
- logger.info(f"์ž…๋ ฅ ํ† ํฐ ์ˆ˜: {inputs['input_ids'].shape[1]}")
639
-
640
- # Kanana์šฉ ์ƒ์„ฑ ์„ค์ •
641
- max_new_tokens = max_length or 100
642
- temperature = temperature or 0.7
643
- top_p = top_p or 0.9
644
- do_sample = do_sample if do_sample is not None else True
645
-
646
- with torch.no_grad():
647
- if image_processed and pixel_values is not None:
648
- # Kanana ๋ชจ๋ธ์˜ ์‹ค์ œ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ฒ˜๋ฆฌ ์‹œ๋„
649
- logger.info("๐Ÿ” Kanana ๋ชจ๋ธ์˜ ์‹ค์ œ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ฒ˜๋ฆฌ ์‹œ๋„...")
650
-
651
- try:
652
- # vision_grid_thw๋ฅผ ์˜ฌ๋ฐ”๋ฅธ ํ˜•ํƒœ๋กœ ๋ณ€ํ™˜ ์‹œ๋„
653
- if 'vision_grid_thw' in image_metas:
654
- grid_thw = image_metas['vision_grid_thw']
655
- if isinstance(grid_thw, (list, tuple)):
656
- grid_thw = torch.tensor(grid_thw)
657
- elif not isinstance(grid_thw, torch.Tensor):
658
- grid_thw = torch.tensor([grid_thw])
659
- image_metas['vision_grid_thw'] = grid_thw.to(model.device)
660
-
661
- # forward_vision๊ณผ forward_projector๋ฅผ ๋ถ„๋ฆฌํ•ด์„œ ์‹œ๋„
662
- visual_features = model.forward_vision(pixel_values, image_metas)
663
- visual_embeds = model.forward_projector(visual_features, image_metas)
664
-
665
- # ํ…์ŠคํŠธ ์ž„๋ฒ ๋”ฉ ์ƒ์„ฑ
666
- text_embeds = model.embed_text_tokens(inputs["input_ids"])
667
-
668
- # ์‹œ๊ฐ์  ์ž„๋ฒ ๋”ฉ์„ ํ…์ŠคํŠธ ์ž„๋ฒ ๋”ฉ๊ณผ ๊ฐ™์€ dtype์œผ๋กœ ๋ณ€ํ™˜
669
- target_dtype = text_embeds.dtype
670
- visual_embeds_converted = visual_embeds.to(target_dtype)
671
-
672
- # ํ…์ŠคํŠธ ์ž„๋ฒ ๋”ฉ์„ ํ‰๋ฉดํ™”
673
- from einops import rearrange
674
- flattened_text_embeds = rearrange(text_embeds, "b l d -> (b l) d")
675
- flattened_input_ids = rearrange(inputs["input_ids"], "b l -> (b l)")
676
-
677
- # -1 ํ† ํฐ ์œ„์น˜์— ์‹œ๊ฐ์  ์ž„๋ฒ ๋”ฉ ์‚ฝ์ž…
678
- mask = (flattened_input_ids == -1)
679
- if mask.sum() > 0:
680
- flattened_text_embeds[mask] = visual_embeds_converted[:mask.sum()]
681
-
682
- # ๋‹ค์‹œ ๋ฐฐ์น˜ ํ˜•ํƒœ๋กœ ์žฌ๊ตฌ์„ฑ
683
- input_embeds = rearrange(flattened_text_embeds, "(b l) d -> b l d", b=inputs["input_ids"].shape[0])
684
- attention_mask = inputs["attention_mask"]
685
-
686
- # ์–ธ์–ด ๋ชจ๋ธ์˜ dtype์— ๋งž์ถฐ input_embeds ๋ณ€ํ™˜
687
- language_model_dtype = next(model.language_model.parameters()).dtype
688
- if input_embeds.dtype != language_model_dtype:
689
- input_embeds = input_embeds.to(language_model_dtype)
690
-
691
- # Kanana ๋ชจ๋ธ์˜ ์›๋ž˜ generate ๋ฉ”์„œ๋“œ ์‚ฌ์šฉ
692
- outputs = model.generate(
693
- input_ids=inputs["input_ids"],
694
- pixel_values=pixel_values,
695
- image_metas=image_metas,
696
- attention_mask=inputs["attention_mask"],
697
- max_new_tokens=max_new_tokens,
698
- do_sample=do_sample,
699
- temperature=temperature,
700
- top_k=40,
701
- top_p=top_p,
702
- repetition_penalty=1.1,
703
- no_repeat_ngram_size=2,
704
- pad_token_id=tokenizer.eos_token_id,
705
- eos_token_id=tokenizer.eos_token_id,
706
- use_cache=True
707
- )
708
- logger.info("โœ… ์‹ค์ œ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ ์„ฑ๊ณต!")
709
-
710
- except Exception as e:
711
- logger.error(f"โŒ ์‹ค์ œ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
712
- logger.error(f"๐Ÿ” ์˜ค๋ฅ˜ ํƒ€์ž…: {type(e).__name__}")
713
- import traceback
714
- logger.error(f"๐Ÿ” ์ƒ์„ธ ์˜ค๋ฅ˜: {traceback.format_exc()}")
715
- logger.info("๐Ÿ”„ fallback: ํ…์ŠคํŠธ ์ „์šฉ ์ฒ˜๋ฆฌ๋กœ ์ „ํ™˜")
716
-
717
- # fallback: ํ…์ŠคํŠธ ์ „์šฉ ์ฒ˜๋ฆฌ
718
- enhanced_prompt = f"[์ด๋ฏธ์ง€ ์ฒจ๋ถ€๋จ] {prompt}"
719
- enhanced_formatted_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{enhanced_prompt}<|im_end|>\n<|im_start|>assistant\n"
720
-
721
- enhanced_inputs = tokenizer(
722
- enhanced_formatted_prompt,
723
- return_tensors="pt",
724
- padding=True,
725
- truncation=True,
726
- max_length=256
727
- )
728
-
729
- if 'token_type_ids' in enhanced_inputs:
730
- del enhanced_inputs['token_type_ids']
731
-
732
- enhanced_inputs = {k: v.to(model.device) for k, v in enhanced_inputs.items()}
733
-
734
- outputs = model.language_model.generate(
735
- input_ids=enhanced_inputs["input_ids"],
736
- attention_mask=enhanced_inputs["attention_mask"],
737
- max_new_tokens=max_new_tokens,
738
- do_sample=do_sample,
739
- temperature=temperature,
740
- top_k=40,
741
- top_p=top_p,
742
- repetition_penalty=1.1,
743
- no_repeat_ngram_size=2,
744
- pad_token_id=tokenizer.eos_token_id,
745
- eos_token_id=tokenizer.eos_token_id,
746
- use_cache=True
747
- )
748
  else:
749
- # ํ…์ŠคํŠธ ์ „์šฉ ์ƒ์„ฑ
750
- logger.info("๐Ÿ“ ํ…์ŠคํŠธ ์ „์šฉ ์ƒ์„ฑ ์‹œ์ž‘")
751
- outputs = model.generate(
752
- input_ids=inputs["input_ids"],
753
- attention_mask=inputs["attention_mask"],
754
- max_new_tokens=max_new_tokens,
755
- do_sample=do_sample,
756
- temperature=temperature,
757
- top_k=40,
758
- top_p=top_p,
759
- repetition_penalty=1.1,
760
- no_repeat_ngram_size=2,
761
- pad_token_id=tokenizer.eos_token_id,
762
- eos_token_id=tokenizer.eos_token_id,
763
- use_cache=True
764
- )
765
-
766
- # ์‘๋‹ต ์ถ”์ถœ
767
- generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
768
- logger.info(f"์ƒ์„ฑ๋œ ํ† ํฐ ์ˆ˜: {outputs.shape[1]}")
769
- logger.info(f"๋””์ฝ”๋”ฉ๋œ ์ „์ฒด ํ…์ŠคํŠธ: '{generated_text}'")
770
-
771
- # Kanana ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์‘๋‹ต ์ถ”์ถœ
772
  if "<|im_start|>assistant" in generated_text:
773
  response = generated_text.split("<|im_start|>assistant")[-1].split("<|im_end|>")[0].strip()
774
- logger.info(f"๐Ÿ” Kanana ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์‘๋‹ต ์ถ”์ถœ: '{response}'")
775
  else:
776
  response = generated_text.strip()
777
- logger.info(f"๐Ÿ” Kanana ์ „์ฒด ํ…์ŠคํŠธ: '{response}'")
778
-
779
  processing_time = time.time() - start_time
780
- logger.info(f"๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ ์™„๋ฃŒ: {processing_time:.2f}์ดˆ, ํ…์ŠคํŠธ ๊ธธ์ด: {len(response)}, ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ: {image_processed}")
781
-
782
- return MultimodalGenerateResponse(
783
- generated_text=response,
784
- processing_time=processing_time,
785
- model_name=current_profile.display_name,
786
- model_id=model_id or current_profile.get_model_info()["model_name"],
787
- image_processed=image_processed
788
- )
789
-
790
  except Exception as e:
791
- processing_time = time.time() - start_time
792
- logger.error(f"โŒ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ ์˜ค๋ฅ˜: {e} (์†Œ์š” ์‹œ๊ฐ„: {processing_time:.2f}์ดˆ)")
793
  raise HTTPException(status_code=500, detail=f"๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ ์‹คํŒจ: {str(e)}")
794
 
 
 
 
795
  @app.get("/models")
796
  async def list_models():
797
  """์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ ๋ชฉ๋ก"""
@@ -2106,6 +1944,462 @@ async def get_latex_ocr_faiss_status():
2106
  logger.error(f"์ƒํƒœ ํ™•์ธ ์˜ค๋ฅ˜: {e}")
2107
  return {"status": "error", "error": str(e)}
2108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2109
  # ============================================================================
2110
  # ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ์‹œ์Šคํ…œ ์—”๋“œํฌ์ธํŠธ
2111
  # ============================================================================
@@ -2168,7 +2462,7 @@ async def generate_hybrid_rag_response(
2168
  use_text: bool = Form(True),
2169
  use_image: bool = Form(True),
2170
  use_latex: bool = Form(True),
2171
- use_latex_ocr: bool = Form(True),
2172
  max_length: Optional[int] = Form(None),
2173
  temperature: Optional[float] = Form(None),
2174
  top_p: Optional[float] = Form(None),
@@ -2220,7 +2514,7 @@ async def get_hybrid_rag_status():
2220
  "text_rag_available": True,
2221
  "image_rag_available": True,
2222
  "latex_rag_available": True,
2223
- "latex_ocr_faiss_available": hybrid_rag_processor.latex_ocr_faiss_integrated is not None,
2224
  "status": "ready"
2225
  }
2226
  except Exception as e:
 
21
  import json
22
  from pathlib import Path
23
 
24
+ # logging ์„ค์ •์„ ๋จผ์ € ๊ตฌ์„ฑ
25
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
26
+ logger = logging.getLogger(__name__)
27
+
28
  from .models import get_model_profile, list_available_models
29
  from lily_llm_core.rag_processor import rag_processor
30
  from lily_llm_core.document_processor import document_processor
 
49
  # from latex_ocr_faiss_simple import LatexOCRFAISSSimple
50
 
51
  # ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ํ”„๋กœ์„ธ์„œ ์ถ”๊ฐ€
52
+ from lily_llm_core.hybrid_rag_processor import hybrid_rag_processor
53
+
54
+ # ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž ๋ฐ LoRA ๊ด€๋ฆฌ์ž ์ถ”๊ฐ€
55
+ from lily_llm_core.context_manager import get_context_manager, context_manager
56
+
57
+ # LoRA ๊ด€๋ฆฌ์ž import (์„ ํƒ์ )
58
+ try:
59
+ from lily_llm_core.lora_manager import get_lora_manager, lora_manager
60
+ LORA_AVAILABLE = True
61
+ logger.info("โœ… LoRA ๊ด€๋ฆฌ์ž import ์„ฑ๊ณต")
62
+ except ImportError as e:
63
+ logger.warning(f"โš ๏ธ LoRA ๊ด€๋ฆฌ์ž import ์‹คํŒจ: {e}")
64
+ LORA_AVAILABLE = False
65
+ lora_manager = None
66
+ get_lora_manager = None
67
 
68
  # FastAPI ์•ฑ ์ƒ์„ฑ
69
  app = FastAPI(
 
175
  # ์ „์—ญ ๋ณ€์ˆ˜
176
  model = None
177
  tokenizer = None
178
+ processor = None
179
  current_profile = None
180
  model_loaded = False
181
  image_processor = None
 
256
  await loop.run_in_executor(executor, load_model_sync, model_id)
257
 
258
  def load_model_sync(model_id: str):
259
+ """๋ชจ๋ธ ๋ฐ ๊ด€๋ จ ํ”„๋กœ์„ธ์„œ๋ฅผ ๋™๊ธฐ์ ์œผ๋กœ ๋กœ๋”ฉ (์ตœ์ข… ์ˆ˜์ •๋ณธ)"""
260
+ global model, tokenizer, processor, current_profile
261
 
262
  try:
 
263
  if model is not None:
264
  logger.info("๐Ÿ—‘๏ธ ๊ธฐ์กด ๋ชจ๋ธ ์–ธ๋กœ๋“œ ์ค‘...")
265
  del model
266
  del tokenizer
267
+ del processor
268
+ model, tokenizer, processor = None, None, None
269
  import gc
270
  gc.collect()
271
  logger.info("โœ… ๊ธฐ์กด ๋ชจ๋ธ ์–ธ๋กœ๋“œ ์™„๋ฃŒ")
272
 
273
  logger.info(f"๐Ÿ“ฅ '{model_id}' ๋ชจ๋ธ ๋กœ๋”ฉ ์‹œ์ž‘...")
274
  current_profile = get_model_profile(model_id)
 
275
 
276
+ # ์ด์ œ load_model์€ (model, processor)๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
277
+ model, processor = current_profile.load_model()
278
+
279
+ # processor์—์„œ tokenizer๋ฅผ ๊บผ๋‚ด ์ „์—ญ ๋ณ€์ˆ˜์— ํ• ๋‹นํ•ฉ๋‹ˆ๋‹ค.
280
+ if hasattr(processor, 'tokenizer'):
281
+ tokenizer = processor.tokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  else:
283
+ # processor ์ž์ฒด๊ฐ€ tokenizer ์—ญํ• ๋„ ํ•  ์ˆ˜ ์žˆ๋Š” ๊ฒฝ์šฐ
284
+ tokenizer = processor
285
+
286
  logger.info(f"โœ… '{current_profile.display_name}' ๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ!")
287
 
288
  except Exception as e:
289
  logger.error(f"โŒ load_model_sync ์‹คํŒจ: {e}")
290
  import traceback
291
  logger.error(f"๐Ÿ” ์ „์ฒด ์—๋Ÿฌ: {traceback.format_exc()}")
292
+ raise
293
 
294
  def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_length: Optional[int] = None,
295
  temperature: Optional[float] = None, top_p: Optional[float] = None,
 
464
  logger.error(f"โŒ ์‘๋‹ต ์ถ”์ถœ ์ค‘ ์˜ค๋ฅ˜: {e}")
465
  raise HTTPException(status_code=500, detail=f"์‘๋‹ต ์ถ”์ถœ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}")
466
 
467
+ # Patched version of /generate and /generate-multimodal with manual greedy decoding loop for Kanana
468
+
469
+ from fastapi import HTTPException, UploadFile, File, Form
470
+ from PIL import Image
471
+ import io
472
+ import time
473
+ import torch
474
+ from typing import Optional
475
+
476
+
477
  @app.post("/generate", response_model=GenerateResponse)
478
  async def generate(prompt: str = Form(...),
479
  image1: UploadFile = File(None),
480
  image2: UploadFile = File(None),
481
  image3: UploadFile = File(None),
482
  image4: UploadFile = File(None),
483
+ use_context: bool = Form(True),
484
+ session_id: str = Form(None)):
485
+ global model_loaded, current_profile, model, tokenizer, processor
486
+
 
487
  if not model_loaded:
488
  raise HTTPException(status_code=503, detail="๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
489
 
490
  start_time = time.time()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
491
 
492
+ if use_context:
493
+ context_manager.add_user_message(prompt, metadata={"session_id": session_id})
494
+
495
+ pil_images = []
496
+ for img_file in [image1, image2, image3, image4]:
497
+ if img_file:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
498
  try:
499
+ data = await img_file.read()
500
+ pil = Image.open(io.BytesIO(data)).convert("RGB")
501
+ pil_images.append(pil)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
502
  except Exception as e:
503
+ logger.warning(f"์ด๋ฏธ์ง€ ๋กœ๋“œ ์‹คํŒจ: {e}")
504
+
505
+ try:
506
+ if getattr(current_profile, 'multimodal', False) and pil_images:
507
+ image_tokens = " ".join(["<image>"] * len(pil_images))
508
+ sample = {"image": pil_images,
509
+ "conv": [{"role": "user", "content": image_tokens},
510
+ {"role": "user", "content": prompt}]}
511
+ inputs = processor.batch_encode_collate([sample], padding_side="left", add_generation_prompt=True)
512
+ inputs = {k: (v.to(model.device) if isinstance(v, torch.Tensor) else v) for k, v in inputs.items()}
 
 
 
 
513
  else:
514
+ formatted_prompt = current_profile.format_prompt(prompt) if hasattr(current_profile, 'format_prompt') else prompt
515
+ sample = {"image": [], "conv": [{"role": "user", "content": formatted_prompt}]}
516
+ inputs = processor.batch_encode_collate([sample], padding_side='left', add_generation_prompt=True)
517
+ inputs = {k: (v.to(model.device) if isinstance(v, torch.Tensor) else v) for k, v in inputs.items()}
518
+
519
+ eot_id = tokenizer.convert_tokens_to_ids("<|eot_id|>")
520
+
521
+ # Manual greedy decoding loop
522
+ generated = inputs["input_ids"].clone()
523
+ for _ in range(64):
524
+ with torch.no_grad():
525
+ out = model(**inputs)
526
+ next_token = out.logits[:, -1, :].argmax(dim=-1, keepdim=True)
527
+ generated = torch.cat([generated, next_token], dim=-1)
528
+ logger.info(f"Step token: {next_token.item()}")
529
+ if next_token.item() == eot_id:
530
+ break
531
+ inputs["input_ids"] = generated
532
+
533
+ logger.info(f"Final Generated IDs: {generated[0].tolist()}")
534
+
535
+ generated_text = tokenizer.decode(generated[0], skip_special_tokens=True)
536
+ response = current_profile.extract_response(generated_text, prompt)
537
+
538
+ if use_context:
539
+ context_manager.add_assistant_message(response, metadata={"session_id": session_id})
540
+
541
+ processing_time = time.time() - start_time
542
+ return GenerateResponse(generated_text=response,
543
+ processing_time=processing_time,
544
+ model_name=current_profile.display_name,
545
+ image_processed=len(pil_images) > 0)
546
+ except Exception as e:
547
+ logger.error(f"โŒ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}", exc_info=True)
548
+ raise HTTPException(status_code=500, detail=f"๋ชจ๋ธ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
549
+
550
+
551
+ @app.post("/generate-multimodal", response_model=MultimodalGenerateResponse)
552
+ async def generate_multimodal(prompt: str = Form(...),
553
+ image: UploadFile = File(None),
554
+ model_id: Optional[str] = Form(None),
555
+ max_length: Optional[int] = Form(None),
556
+ temperature: Optional[float] = Form(None),
557
+ top_p: Optional[float] = Form(None),
558
+ do_sample: Optional[bool] = Form(None)):
559
+ global model_loaded, current_profile, model, tokenizer, processor
560
+
561
+ if not model_loaded:
562
+ raise HTTPException(status_code=500, detail="๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค")
563
+
564
+ start_time = time.time()
565
+
566
+ pil_image = None
567
+ if image:
568
+ try:
569
+ data = await image.read()
570
+ pil_image = Image.open(io.BytesIO(data)).convert("RGB")
571
+ except Exception as e:
572
+ logger.error(f"์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
573
+
574
+ try:
575
+ image_list = [pil_image] if pil_image else []
576
+ image_tokens = " ".join(["<image>"] * len(image_list)) if image_list else ""
577
+ conv = []
578
+ if image_list:
579
+ conv.append({"role": "user", "content": image_tokens})
580
+ conv.append({"role": "user", "content": prompt})
581
+
582
+ logger.info("=== STEP 1: building sample ===")
583
+ sample = {"image": [], "conv": [{"role": "user", "content": prompt}]}
584
+
585
+ logger.info("=== STEP 2: calling processor ===")
586
+ inputs = processor.batch_encode_collate([sample], padding_side='left', add_generation_prompt=True)
587
+ logger.info("=== STEP 3: processor returned ===")
588
+
589
+ for k, v in inputs.items():
590
+ if isinstance(v, torch.Tensor):
591
+ logger.info(f"Key {k}: tensor shape {v.shape}, dtype {v.dtype}, device {v.device}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
592
  else:
593
+ logger.info(f"Key {k}: {type(v)}")
594
+
595
+ logger.info("=== STEP 4: moving to device ===")
596
+ inputs = {k: (v.to(model.device) if isinstance(v, torch.Tensor) else v) for k, v in inputs.items()}
597
+ logger.info("=== STEP 5: moved to device ===")
598
+ eot_id = tokenizer.convert_tokens_to_ids("<|eot_id|>")
599
+
600
+ # Manual greedy decoding loop
601
+ generated = inputs["input_ids"].clone()
602
+ for _ in range(64):
603
+ with torch.no_grad():
604
+ out = model(**inputs)
605
+ next_token = out.logits[:, -1, :].argmax(dim=-1, keepdim=True)
606
+ generated = torch.cat([generated, next_token], dim=-1)
607
+ logger.info(f"Step token: {next_token.item()}")
608
+ if next_token.item() == eot_id:
609
+ break
610
+ inputs["input_ids"] = generated
611
+
612
+ logger.info(f"Final Generated IDs: {generated[0].tolist()}")
613
+
614
+ generated_text = tokenizer.decode(generated[0], skip_special_tokens=True)
 
615
  if "<|im_start|>assistant" in generated_text:
616
  response = generated_text.split("<|im_start|>assistant")[-1].split("<|im_end|>")[0].strip()
 
617
  else:
618
  response = generated_text.strip()
619
+
 
620
  processing_time = time.time() - start_time
621
+ return MultimodalGenerateResponse(generated_text=response,
622
+ processing_time=processing_time,
623
+ model_name=current_profile.display_name,
624
+ model_id=model_id or current_profile.get_model_info().get("model_name"),
625
+ image_processed=bool(pil_image))
 
 
 
 
 
626
  except Exception as e:
627
+ logger.error(f"โŒ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ ์˜ค๋ฅ˜: {e}")
 
628
  raise HTTPException(status_code=500, detail=f"๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ ์‹คํŒจ: {str(e)}")
629
 
630
+
631
+
632
+
633
  @app.get("/models")
634
  async def list_models():
635
  """์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ ๋ชฉ๋ก"""
 
1944
  logger.error(f"์ƒํƒœ ํ™•์ธ ์˜ค๋ฅ˜: {e}")
1945
  return {"status": "error", "error": str(e)}
1946
 
1947
+ # ============================================================================
1948
+ # ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ ์‹œ์Šคํ…œ ์—”๋“œํฌ์ธํŠธ
1949
+ # ============================================================================
1950
+
1951
+ @app.post("/context/set-system-prompt")
1952
+ async def set_system_prompt(prompt: str = Form(...)):
1953
+ """์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ์„ค์ •"""
1954
+ try:
1955
+ context_manager.set_system_prompt(prompt)
1956
+ return {
1957
+ "success": True,
1958
+ "message": "์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ๊ฐ€ ์„ค์ •๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
1959
+ "prompt_length": len(prompt)
1960
+ }
1961
+ except Exception as e:
1962
+ logger.error(f"โŒ ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ์„ค์ • ์‹คํŒจ: {e}")
1963
+ return {"success": False, "error": str(e)}
1964
+
1965
+ @app.post("/context/add-message")
1966
+ async def add_context_message(
1967
+ role: str = Form(...), # 'user' ๋˜๋Š” 'assistant'
1968
+ content: str = Form(...),
1969
+ message_id: str = Form(None),
1970
+ metadata: str = Form("{}") # JSON ๋ฌธ์ž์—ด
1971
+ ):
1972
+ """์ปจํ…์ŠคํŠธ์— ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€"""
1973
+ try:
1974
+ import json
1975
+ metadata_dict = json.loads(metadata) if metadata else {}
1976
+
1977
+ if role == "user":
1978
+ msg_id = context_manager.add_user_message(content, message_id, metadata_dict)
1979
+ elif role == "assistant":
1980
+ msg_id = context_manager.add_assistant_message(content, message_id, metadata_dict)
1981
+ else:
1982
+ return {"success": False, "error": "์ž˜๋ชป๋œ ์—ญํ• ์ž…๋‹ˆ๋‹ค. 'user' ๋˜๋Š” 'assistant'๋ฅผ ์‚ฌ์šฉํ•˜์„ธ์š”."}
1983
+
1984
+ return {
1985
+ "success": True,
1986
+ "message": "๋ฉ”์‹œ์ง€๊ฐ€ ์ปจํ…์ŠคํŠธ์— ์ถ”๊ฐ€๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
1987
+ "message_id": msg_id,
1988
+ "context_summary": context_manager.get_context_summary()
1989
+ }
1990
+ except Exception as e:
1991
+ logger.error(f"โŒ ์ปจํ…์ŠคํŠธ ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€ ์‹คํŒจ: {e}")
1992
+ return {"success": False, "error": str(e)}
1993
+
1994
+ @app.get("/context/get")
1995
+ async def get_context(
1996
+ include_system: bool = True,
1997
+ max_length: Optional[int] = None,
1998
+ recent_turns: Optional[int] = None
1999
+ ):
2000
+ """ํ˜„์žฌ ์ปจํ…์ŠคํŠธ ์กฐํšŒ"""
2001
+ try:
2002
+ if recent_turns:
2003
+ context = context_manager.get_recent_context(recent_turns)
2004
+ else:
2005
+ context = context_manager.get_context(include_system, max_length)
2006
+
2007
+ return {
2008
+ "success": True,
2009
+ "context": context,
2010
+ "context_summary": context_manager.get_context_summary(),
2011
+ "memory_efficiency": context_manager.get_memory_efficiency()
2012
+ }
2013
+ except Exception as e:
2014
+ logger.error(f"โŒ ์ปจํ…์ŠคํŠธ ์กฐํšŒ ์‹คํŒจ: {e}")
2015
+ return {"success": False, "error": str(e)}
2016
+
2017
+ @app.get("/context/summary")
2018
+ async def get_context_summary():
2019
+ """์ปจํ…์ŠคํŠธ ์š”์•ฝ ์ •๋ณด ์กฐํšŒ"""
2020
+ try:
2021
+ return {
2022
+ "success": True,
2023
+ "summary": context_manager.get_context_summary(),
2024
+ "memory_efficiency": context_manager.get_memory_efficiency()
2025
+ }
2026
+ except Exception as e:
2027
+ logger.error(f"โŒ ์ปจํ…์ŠคํŠธ ์š”์•ฝ ์กฐํšŒ ์‹คํŒจ: {e}")
2028
+ return {"success": False, "error": str(e)}
2029
+
2030
+ @app.post("/context/clear")
2031
+ async def clear_context():
2032
+ """์ปจํ…์ŠคํŠธ ์ดˆ๊ธฐํ™”"""
2033
+ try:
2034
+ context_manager.clear_context()
2035
+ return {
2036
+ "success": True,
2037
+ "message": "์ปจํ…์ŠคํŠธ๊ฐ€ ์ดˆ๊ธฐํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
2038
+ }
2039
+ except Exception as e:
2040
+ logger.error(f"โŒ ์ปจํ…์ŠคํŠธ ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}")
2041
+ return {"success": False, "error": str(e)}
2042
+
2043
+ @app.delete("/context/message/{message_id}")
2044
+ async def remove_context_message(message_id: str):
2045
+ """์ปจํ…์ŠคํŠธ์—์„œ ํŠน์ • ๋ฉ”์‹œ์ง€ ์ œ๊ฑฐ"""
2046
+ try:
2047
+ success = context_manager.remove_message(message_id)
2048
+ if success:
2049
+ return {
2050
+ "success": True,
2051
+ "message": "๋ฉ”์‹œ์ง€๊ฐ€ ์ œ๊ฑฐ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
2052
+ "context_summary": context_manager.get_context_summary()
2053
+ }
2054
+ else:
2055
+ return {"success": False, "error": "๋ฉ”์‹œ์ง€๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."}
2056
+ except Exception as e:
2057
+ logger.error(f"โŒ ๋ฉ”์‹œ์ง€ ์ œ๊ฑฐ ์‹คํŒจ: {e}")
2058
+ return {"success": False, "error": str(e)}
2059
+
2060
+ @app.put("/context/message/{message_id}")
2061
+ async def edit_context_message(
2062
+ message_id: str,
2063
+ new_content: str = Form(...)
2064
+ ):
2065
+ """์ปจํ…์ŠคํŠธ ๋ฉ”์‹œ์ง€ ์ˆ˜์ •"""
2066
+ try:
2067
+ success = context_manager.edit_message(message_id, new_content)
2068
+ if success:
2069
+ return {
2070
+ "success": True,
2071
+ "message": "๋ฉ”์‹œ์ง€๊ฐ€ ์ˆ˜์ •๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
2072
+ "context_summary": context_manager.get_context_summary()
2073
+ }
2074
+ else:
2075
+ return {"success": False, "error": "๋ฉ”์‹œ์ง€๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."}
2076
+ except Exception as e:
2077
+ logger.error(f"โŒ ๋ฉ”์‹œ์ง€ ์ˆ˜์ • ์‹คํŒจ: {e}")
2078
+ return {"success": False, "error": str(e)}
2079
+
2080
+ @app.get("/context/search")
2081
+ async def search_context(query: str, max_results: int = 5):
2082
+ """์ปจํ…์ŠคํŠธ ๋‚ด์—์„œ ๊ฒ€์ƒ‰"""
2083
+ try:
2084
+ results = context_manager.search_context(query, max_results)
2085
+ return {
2086
+ "success": True,
2087
+ "query": query,
2088
+ "results": results,
2089
+ "total_results": len(results)
2090
+ }
2091
+ except Exception as e:
2092
+ logger.error(f"โŒ ์ปจํ…์ŠคํŠธ ๊ฒ€์ƒ‰ ์‹คํŒจ: {e}")
2093
+ return {"success": False, "error": str(e)}
2094
+
2095
+ @app.post("/context/export")
2096
+ async def export_context(file_path: str = Form(None)):
2097
+ """์ปจํ…์ŠคํŠธ ๋‚ด๋ณด๋‚ด๊ธฐ"""
2098
+ try:
2099
+ exported_path = context_manager.export_context(file_path)
2100
+ return {
2101
+ "success": True,
2102
+ "message": "์ปจํ…์ŠคํŠธ๊ฐ€ ๋‚ด๋ณด๋‚ด์กŒ์Šต๋‹ˆ๋‹ค.",
2103
+ "file_path": exported_path
2104
+ }
2105
+ except Exception as e:
2106
+ logger.error(f"โŒ ์ปจํ…์ŠคํŠธ ๋‚ด๋ณด๋‚ด๊ธฐ ์‹คํŒจ: {e}")
2107
+ return {"success": False, "error": str(e)}
2108
+
2109
+ @app.post("/context/import")
2110
+ async def import_context(file_path: str = Form(...)):
2111
+ """์ปจํ…์ŠคํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ"""
2112
+ try:
2113
+ success = context_manager.import_context(file_path)
2114
+ if success:
2115
+ return {
2116
+ "success": True,
2117
+ "message": "์ปจํ…์ŠคํŠธ๊ฐ€ ๊ฐ€์ ธ์™€์กŒ์Šต๋‹ˆ๋‹ค.",
2118
+ "context_summary": context_manager.get_context_summary()
2119
+ }
2120
+ else:
2121
+ return {"success": False, "error": "์ปจํ…์ŠคํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."}
2122
+ except Exception as e:
2123
+ logger.error(f"โŒ ์ปจํ…์ŠคํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ ์‹คํŒจ: {e}")
2124
+ return {"success": False, "error": str(e)}
2125
+
2126
+ # ============================================================================
2127
+ # LoRA/QLoRA ๊ด€๋ฆฌ ์‹œ์Šคํ…œ ์—”๋“œํฌ์ธํŠธ
2128
+ # ============================================================================
2129
+
2130
+ @app.post("/lora/load-base-model")
2131
+ async def load_lora_base_model(
2132
+ model_path: str = Form(...),
2133
+ model_type: str = Form("causal_lm")
2134
+ ):
2135
+ """LoRA ๊ธฐ๋ณธ ๋ชจ๋ธ ๋กœ๋“œ"""
2136
+ if not LORA_AVAILABLE or lora_manager is None:
2137
+ return {
2138
+ "success": False,
2139
+ "error": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
2140
+ }
2141
+
2142
+ try:
2143
+ success = lora_manager.load_base_model(model_path, model_type)
2144
+ if success:
2145
+ return {
2146
+ "success": True,
2147
+ "message": "๊ธฐ๋ณธ ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
2148
+ "model_path": model_path,
2149
+ "device": lora_manager.device
2150
+ }
2151
+ else:
2152
+ return {"success": False, "error": "๋ชจ๋ธ ๋กœ๋“œ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."}
2153
+ except Exception as e:
2154
+ logger.error(f"โŒ LoRA ๊ธฐ๋ณธ ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
2155
+ return {"success": False, "error": str(e)}
2156
+
2157
+ @app.post("/lora/create-config")
2158
+ async def create_lora_config(
2159
+ r: int = Form(16),
2160
+ lora_alpha: int = Form(32),
2161
+ target_modules: str = Form("q_proj,v_proj,k_proj,o_proj,gate_proj,up_proj,down_proj"),
2162
+ lora_dropout: float = Form(0.1),
2163
+ bias: str = Form("none"),
2164
+ task_type: str = Form("CAUSAL_LM")
2165
+ ):
2166
+ """LoRA ์„ค์ • ์ƒ์„ฑ"""
2167
+ if not LORA_AVAILABLE or lora_manager is None:
2168
+ return {
2169
+ "success": False,
2170
+ "error": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
2171
+ }
2172
+
2173
+ try:
2174
+ # target_modules๋ฅผ ๋ฆฌ์ŠคํŠธ๋กœ ๋ณ€ํ™˜
2175
+ target_modules_list = target_modules.split(",") if target_modules else None
2176
+
2177
+ config = lora_manager.create_lora_config(
2178
+ r=r,
2179
+ lora_alpha=lora_alpha,
2180
+ target_modules=target_modules_list,
2181
+ lora_dropout=lora_dropout,
2182
+ bias=bias,
2183
+ task_type=task_type
2184
+ )
2185
+
2186
+ return {
2187
+ "success": True,
2188
+ "message": "LoRA ์„ค์ •์ด ์ƒ์„ฑ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
2189
+ "config": config.to_dict()
2190
+ }
2191
+ except Exception as e:
2192
+ logger.error(f"โŒ LoRA ์„ค์ • ์ƒ์„ฑ ์‹คํŒจ: {e}")
2193
+ return {"success": False, "error": str(e)}
2194
+
2195
+ @app.post("/lora/apply")
2196
+ async def apply_lora_adapter(adapter_name: str = Form("default")):
2197
+ """LoRA ์–ด๋Œ‘ํ„ฐ๋ฅผ ๋ชจ๋ธ์— ์ ์šฉ"""
2198
+ if not LORA_AVAILABLE or lora_manager is None:
2199
+ return {
2200
+ "success": False,
2201
+ "error": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
2202
+ }
2203
+
2204
+ try:
2205
+ success = lora_manager.apply_lora_to_model(adapter_name)
2206
+ if success:
2207
+ return {
2208
+ "success": True,
2209
+ "message": "LoRA ์–ด๋Œ‘ํ„ฐ๊ฐ€ ์ ์šฉ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
2210
+ "adapter_name": adapter_name,
2211
+ "stats": lora_manager.get_adapter_stats()
2212
+ }
2213
+ else:
2214
+ return {"success": False, "error": "LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."}
2215
+ except Exception as e:
2216
+ logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ ์‹คํŒจ: {e}")
2217
+ return {"success": False, "error": str(e)}
2218
+
2219
+ @app.post("/lora/load-adapter")
2220
+ async def load_lora_adapter(
2221
+ adapter_path: str = Form(...),
2222
+ adapter_name: str = Form(None)
2223
+ ):
2224
+ """์ €์žฅ๋œ LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋“œ"""
2225
+ if not LORA_AVAILABLE or lora_manager is None:
2226
+ return {
2227
+ "success": False,
2228
+ "error": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
2229
+ }
2230
+
2231
+ try:
2232
+ success = lora_manager.load_lora_adapter(adapter_path, adapter_name)
2233
+ if success:
2234
+ return {
2235
+ "success": True,
2236
+ "message": "LoRA ์–ด๋Œ‘ํ„ฐ๊ฐ€ ๋กœ๋“œ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
2237
+ "adapter_name": lora_manager.current_adapter_name,
2238
+ "stats": lora_manager.get_adapter_stats()
2239
+ }
2240
+ else:
2241
+ return {"success": False, "error": "LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋“œ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."}
2242
+ except Exception as e:
2243
+ logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋“œ ์‹คํŒจ: {e}")
2244
+ return {"success": False, "error": str(e)}
2245
+
2246
+ @app.post("/lora/save-adapter")
2247
+ async def save_lora_adapter(
2248
+ adapter_name: str = Form(None),
2249
+ output_dir: str = Form(None)
2250
+ ):
2251
+ """LoRA ์–ด๋Œ‘ํ„ฐ ์ €์žฅ"""
2252
+ if not LORA_AVAILABLE or lora_manager is None:
2253
+ return {
2254
+ "success": False,
2255
+ "error": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
2256
+ }
2257
+
2258
+ try:
2259
+ success = lora_manager.save_lora_adapter(adapter_name, output_dir)
2260
+ if success:
2261
+ return {
2262
+ "success": True,
2263
+ "message": "LoRA ์–ด๋Œ‘ํ„ฐ๊ฐ€ ์ €์žฅ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
2264
+ "adapter_name": lora_manager.current_adapter_name
2265
+ }
2266
+ else:
2267
+ return {"success": False, "error": "LoRA ์–ด๋Œ‘ํ„ฐ ์ €์žฅ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."}
2268
+ except Exception as e:
2269
+ logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ์ €์žฅ ์‹คํŒจ: {e}")
2270
+ return {"success": False, "error": str(e)}
2271
+
2272
+ @app.get("/lora/adapters")
2273
+ async def list_lora_adapters():
2274
+ """์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ LoRA ์–ด๋Œ‘ํ„ฐ ๋ชฉ๋ก"""
2275
+ if not LORA_AVAILABLE or lora_manager is None:
2276
+ return {
2277
+ "success": False,
2278
+ "error": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
2279
+ }
2280
+
2281
+ try:
2282
+ adapters = lora_manager.list_available_adapters()
2283
+ return {
2284
+ "success": True,
2285
+ "adapters": adapters
2286
+ }
2287
+ except Exception as e:
2288
+ logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ๋ชฉ๋ก ์กฐํšŒ ์‹คํŒจ: {e}")
2289
+ return {"success": False, "error": str(e)}
2290
+
2291
+ @app.get("/lora/stats")
2292
+ async def get_lora_stats():
2293
+ """ํ˜„์žฌ LoRA ์–ด๋Œ‘ํ„ฐ ํ†ต๊ณ„"""
2294
+ if not LORA_AVAILABLE or lora_manager is None:
2295
+ return {
2296
+ "success": False,
2297
+ "error": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
2298
+ }
2299
+
2300
+ try:
2301
+ stats = lora_manager.get_adapter_stats()
2302
+ return {
2303
+ "success": True,
2304
+ "stats": stats
2305
+ }
2306
+ except Exception as e:
2307
+ logger.error(f"โŒ LoRA ํ†ต๊ณ„ ์กฐํšŒ ์‹คํŒจ: {e}")
2308
+ return {"success": False, "error": str(e)}
2309
+
2310
+ @app.post("/lora/switch")
2311
+ async def switch_lora_adapter(adapter_name: str = Form(...)):
2312
+ """LoRA ์–ด๋Œ‘ํ„ฐ ์ „ํ™˜"""
2313
+ if not LORA_AVAILABLE or lora_manager is None:
2314
+ return {
2315
+ "success": False,
2316
+ "error": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
2317
+ }
2318
+
2319
+ try:
2320
+ success = lora_manager.switch_adapter(adapter_name)
2321
+ if success:
2322
+ return {
2323
+ "success": True,
2324
+ "message": f"LoRA ์–ด๋Œ‘ํ„ฐ๊ฐ€ {adapter_name}์œผ๋กœ ์ „ํ™˜๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
2325
+ "adapter_name": adapter_name,
2326
+ "stats": lora_manager.get_adapter_stats()
2327
+ }
2328
+ else:
2329
+ return {"success": False, "error": "LoRA ์–ด๋Œ‘ํ„ฐ ์ „ํ™˜์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."}
2330
+ except Exception as e:
2331
+ logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ์ „ํ™˜ ์‹คํŒจ: {e}")
2332
+ return {"success": False, "error": str(e)}
2333
+
2334
+ @app.post("/lora/unload")
2335
+ async def unload_lora_adapter():
2336
+ """LoRA ์–ด๋Œ‘ํ„ฐ ์–ธ๋กœ๋“œ"""
2337
+ if not LORA_AVAILABLE or lora_manager is None:
2338
+ return {
2339
+ "success": False,
2340
+ "error": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
2341
+ }
2342
+
2343
+ try:
2344
+ success = lora_manager.unload_adapter()
2345
+ if success:
2346
+ return {
2347
+ "success": True,
2348
+ "message": "LoRA ์–ด๋Œ‘ํ„ฐ๊ฐ€ ์–ธ๋กœ๋“œ๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
2349
+ }
2350
+ else:
2351
+ return {"success": False, "error": "LoRA ์–ด๋Œ‘ํ„ฐ ์–ธ๋กœ๋“œ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."}
2352
+ except Exception as e:
2353
+ logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ์–ธ๋กœ๋“œ ์‹คํŒจ: {e}")
2354
+ return {"success": False, "error": str(e)}
2355
+
2356
+ @app.post("/lora/generate")
2357
+ async def generate_with_lora(
2358
+ prompt: str = Form(...),
2359
+ max_length: int = Form(100),
2360
+ temperature: float = Form(0.7)
2361
+ ):
2362
+ """LoRA ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•œ ํ…์ŠคํŠธ ์ƒ์„ฑ"""
2363
+ if not LORA_AVAILABLE or lora_manager is None:
2364
+ return {
2365
+ "success": False,
2366
+ "error": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
2367
+ }
2368
+
2369
+ try:
2370
+ response = lora_manager.generate_text(prompt, max_length, temperature)
2371
+ return {
2372
+ "success": True,
2373
+ "response": response,
2374
+ "adapter_name": lora_manager.current_adapter_name
2375
+ }
2376
+ except Exception as e:
2377
+ logger.error(f"โŒ LoRA ํ…์ŠคํŠธ ์ƒ์„ฑ ์‹คํŒจ: {e}")
2378
+ return {"success": False, "error": str(e)}
2379
+
2380
+ @app.post("/lora/merge")
2381
+ async def merge_lora_with_base(output_path: str = Form(None)):
2382
+ """LoRA ์–ด๋Œ‘ํ„ฐ๋ฅผ ๊ธฐ๋ณธ ๋ชจ๋ธ๊ณผ ๋ณ‘ํ•ฉ"""
2383
+ if not LORA_AVAILABLE or lora_manager is None:
2384
+ return {
2385
+ "success": False,
2386
+ "error": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
2387
+ }
2388
+
2389
+ try:
2390
+ success = lora_manager.merge_lora_with_base(output_path)
2391
+ if success:
2392
+ return {
2393
+ "success": True,
2394
+ "message": "LoRA ์–ด๋Œ‘ํ„ฐ๊ฐ€ ๊ธฐ๋ณธ ๋ชจ๋ธ๊ณผ ๋ณ‘ํ•ฉ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
2395
+ "output_path": output_path or f"{lora_manager.base_model_path}_merged"
2396
+ }
2397
+ else:
2398
+ return {"success": False, "error": "LoRA ์–ด๋Œ‘ํ„ฐ ๋ณ‘ํ•ฉ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."}
2399
+ except Exception as e:
2400
+ logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ๋ณ‘ํ•ฉ ์‹คํŒจ: {e}")
2401
+ return {"success": False, "error": str(e)}
2402
+
2403
  # ============================================================================
2404
  # ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ์‹œ์Šคํ…œ ์—”๋“œํฌ์ธํŠธ
2405
  # ============================================================================
 
2462
  use_text: bool = Form(True),
2463
  use_image: bool = Form(True),
2464
  use_latex: bool = Form(True),
2465
+ use_latex_ocr: bool = Form(False), # LaTeX-OCR ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋จ
2466
  max_length: Optional[int] = Form(None),
2467
  temperature: Optional[float] = Form(None),
2468
  top_p: Optional[float] = Form(None),
 
2514
  "text_rag_available": True,
2515
  "image_rag_available": True,
2516
  "latex_rag_available": True,
2517
+ "latex_ocr_faiss_available": False, # LaTeX-OCR ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋จ
2518
  "status": "ready"
2519
  }
2520
  except Exception as e:
lily_llm_api/app_v2_250819_1305.py ADDED
The diff for this file is too large to render. See raw diff
 
lily_llm_api/app_v2_250819_1828.py ADDED
The diff for this file is too large to render. See raw diff
 
lily_llm_api/app_v2_250819_2008.py ADDED
The diff for this file is too large to render. See raw diff
 
lily_llm_api/models/{configuration.py โ†’ back/configuration.py} RENAMED
File without changes
lily_llm_api/models/{modeling.py โ†’ back/modeling.py} RENAMED
File without changes
lily_llm_api/models/kanana_1_5_v_3b_instruct.py CHANGED
@@ -9,6 +9,8 @@ import logging
9
  from transformers import AutoTokenizer
10
  import os
11
  from dotenv import load_dotenv
 
 
12
  load_dotenv()
13
 
14
  HF_TOKEN = os.getenv("HF_TOKEN")
@@ -95,113 +97,66 @@ class Kanana15V3bInstructProfile:
95
  logger.error(f"โŒ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋กœ๋“œ ์‹คํŒจ: {e}")
96
 
97
  def load_model(self) -> Tuple[Any, Any]:
98
- """ํ™˜๊ฒฝ์— ๋”ฐ๋ผ ๋ชจ๋ธ์„ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค."""
99
- logger.info(f"๐Ÿ“ฅ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
100
 
101
- import os
102
  from pathlib import Path
 
 
 
 
103
 
104
- # ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋กœ๋”ฉ
105
- self._load_environment_variables()
 
106
 
107
  try:
108
- # 1. ๋กœ์ปฌ ์บ์‹œ ๊ฒฝ๋กœ๊ฐ€ ์žˆ๋Š”์ง€ ํ™•์ธ
109
- use_local = False
110
- if self.local_path is not None:
111
- local_model_path = Path(self.local_path)
112
- use_local = local_model_path.exists() and any(local_model_path.iterdir())
113
-
114
- if use_local:
115
- logger.info(f"๐Ÿ—‚๏ธ ๋กœ์ปฌ ๋ชจ๋ธ ์‚ฌ์šฉ: {self.local_path}")
116
- model_path = self.local_path
117
- local_files_only = True
118
-
119
- # ๋กœ์ปฌ ๋ชจ๋ธ์˜ ๊ฒฝ์šฐ sys.path์— ์ถ”๊ฐ€
120
- if self.local_path not in sys.path:
121
- sys.path.insert(0, self.local_path)
122
- else:
123
- logger.info(f"๐ŸŒ Hugging Face Hub์—์„œ ๋‹ค์šด๋กœ๋“œ: {self.model_name}")
124
- model_path = self.model_name
125
- local_files_only = False
126
-
127
- # ํ™˜๊ฒฝ๋ณ„ ์ถ”๊ฐ€ ์„ค์ •
128
- if self.is_local:
129
- logger.info("๐Ÿ  ๋กœ์ปฌ ํ™˜๊ฒฝ ์„ค์ • ์ ์šฉ")
130
- # ๋กœ์ปฌ ํ™˜๊ฒฝ์—์„œ๋Š” ์ถ”๊ฐ€ ์„ค์ •์ด ํ•„์š”ํ•  ์ˆ˜ ์žˆ์Œ
131
- else:
132
- logger.info("โ˜๏ธ ์„œ๋ฒ„ ํ™˜๊ฒฝ ์„ค์ • ์ ์šฉ")
133
- # ์„œ๋ฒ„ ํ™˜๊ฒฝ์—์„œ๋Š” ์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ ๋“ฑ ์„ค์ •
134
 
135
- # DEBUG: ๋ชจ๋ธ ๊ฒฝ๋กœ์™€ ์„ค์ • ์ถœ๋ ฅ
136
- logger.info(f"๐Ÿ” DEBUG: model_path = {model_path}")
137
- logger.info(f"๐Ÿ” DEBUG: local_files_only = {local_files_only}")
138
- logger.info(f"๐Ÿ” DEBUG: HF_TOKEN = {'์žˆ์Œ' if HF_TOKEN else '์—†์Œ'}")
139
- logger.info(f"๐Ÿ” DEBUG: use_local = {use_local}")
140
- logger.info(f"๐Ÿ” DEBUG: is_local = {self.is_local}")
141
-
142
- # 2. ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ
143
- logger.info(f"๐Ÿ” DEBUG: ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ ์‹œ์ž‘ - model_path={model_path}")
144
- tokenizer = AutoTokenizer.from_pretrained(
145
  model_path,
146
- token=HF_TOKEN,
147
  trust_remote_code=True,
148
- local_files_only=local_files_only,
149
- cache_dir="/app/cache/transformers" if not use_local else None
150
  )
151
- logger.info(f"โœ… ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ ์™„๋ฃŒ ({tokenizer.__class__.__name__})")
152
-
153
- # 3. ๋ชจ๋ธ ๋กœ๋“œ
154
- logger.info(f"๐Ÿ” DEBUG: ๋ชจ๋ธ ๋กœ๋“œ ์‹œ์ž‘ - use_local={use_local}")
155
- from modeling import KananaVForConditionalGeneration
156
-
157
- if use_local:
158
- # ๋กœ์ปฌ ๋ชจ๋ธ: ์ปค์Šคํ…€ ๋ชจ๋ธ๋ง ํด๋ž˜์Šค ์‚ฌ์šฉ
159
- logger.info("๐Ÿ” DEBUG: ๋กœ์ปฌ ๋ชจ๋ธ ๋กœ๋“œ ์‹œ๋„")
160
- selected_dtype = torch.float16 if DEVICE == "cuda" else torch.float16
161
- logger.info(f"๐Ÿ” DEBUG: selected_dtype = {selected_dtype}")
162
- model = KananaVForConditionalGeneration.from_pretrained(
163
- model_path,
164
- token=HF_TOKEN,
165
- trust_remote_code=True,
166
- torch_dtype=selected_dtype,
167
- local_files_only=True,
168
- # low_cpu_mem_usage=True,
169
- ).to(DEVICE)
170
- else:
171
- # ์„œ๋ฒ„ ํ™˜๊ฒฝ: KananaVForConditionalGeneration ์ง์ ‘ ์‚ฌ์šฉ
172
- logger.info("๐Ÿ” DEBUG: ์„œ๋ฒ„ ๋ชจ๋ธ ๋กœ๋“œ ์‹œ๋„")
173
- logger.info("๏ฟฝ๏ฟฝ๏ฟฝ๏ฟฝ DEBUG: modeling ๋ชจ๋“ˆ import ์‹œ๋„")
174
- try:
175
- logger.info("๐Ÿ” DEBUG: modeling ๋ชจ๋“ˆ import ์„ฑ๊ณต")
176
- except ImportError as e:
177
- logger.error(f"๐Ÿ” DEBUG: modeling ๋ชจ๋“ˆ import ์‹คํŒจ - {e}")
178
- raise
179
-
180
- logger.info("๐Ÿ” DEBUG: KananaVForConditionalGeneration.from_pretrained ํ˜ธ์ถœ")
181
- # CPU ํ™˜๊ฒฝ์—์„œ float16/bfloat16๋ณด๋‹ค float32๊ฐ€ ๋” ์•ˆ์ •์ ์ธ ๊ฒฝ์šฐ๊ฐ€ ๋งŽ์Œ
182
- selected_dtype = torch.float16 if DEVICE == "cuda" else torch.float16
183
- model = KananaVForConditionalGeneration.from_pretrained(
184
- model_path,
185
- token=HF_TOKEN,
186
- torch_dtype=selected_dtype,
187
- trust_remote_code=True,
188
- cache_dir="/app/cache/transformers",
189
- # device_map="auto",
190
- # low_cpu_mem_usage=True,
191
- ).to(DEVICE)
192
 
193
- logger.info(f"โœ… ๋ชจ๋ธ ๋กœ๋“œ ์™„๋ฃŒ ({model.__class__.__name__})")
194
- return model, tokenizer
 
 
195
 
196
  except Exception as e:
197
- logger.error(f"โŒ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}", exc_info=True)
198
- if use_local and self.local_path in sys.path:
199
- sys.path.remove(self.local_path)
200
- raise
 
 
 
 
201
 
202
  def get_generation_config(self) -> Dict[str, Any]:
203
  # ๋ชจ๋ธ ํŒŒ๋ผ๋ฏธํ„ฐ ์ตœ์ ํ™” ์„ค์ •, max_new_tokens : ์ƒ์„ฑ๋˜๋Š” ํ…์ŠคํŠธ ๊ธธ์ด ์ตœ๋Œ€๊ฐ’ (์ด๋ฏธ์ง€ ์„ค๋ช…์„ ์œ„ํ•ด ์ฆ๊ฐ€)
204
- return {"max_new_tokens": 256, "temperature": 0.7, "do_sample": True, "top_k": 40, "top_p": 0.9, "repetition_penalty": 1.1}
205
 
206
  def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
207
  """
 
9
  from transformers import AutoTokenizer
10
  import os
11
  from dotenv import load_dotenv
12
+ from pathlib import Path
13
+
14
  load_dotenv()
15
 
16
  HF_TOKEN = os.getenv("HF_TOKEN")
 
97
  logger.error(f"โŒ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋กœ๋“œ ์‹คํŒจ: {e}")
98
 
99
  def load_model(self) -> Tuple[Any, Any]:
100
+ """๋ชจ๋ธ ๋กœ๋“œ (๊ณต์‹์ ์ธ ๋ฐฉ๋ฒ• + ์ ˆ๋Œ€ ๊ฒฝ๋กœ sys.path ์ˆ˜์ • ์ตœ์ข…๋ณธ)"""
101
+ logger.info(f"๐Ÿ“ฅ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์ค‘ (๊ณต์‹ ๋ฐฉ๋ฒ•)...")
102
 
103
+ import sys
104
  from pathlib import Path
105
+ import os # <<< os ๋ชจ๋“ˆ ์ถ”๊ฐ€
106
+
107
+ # self.local_path๋ฅผ ์ ˆ๋Œ€ ๊ฒฝ๋กœ๋กœ ๋ณ€ํ™˜
108
+ absolute_model_path = os.path.abspath(self.local_path)
109
 
110
+ use_local = Path(absolute_model_path).exists() and any(Path(absolute_model_path).iterdir())
111
+ # model_path ๋ณ€์ˆ˜์— ์ ˆ๋Œ€ ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉ
112
+ model_path = absolute_model_path if use_local else self.model_name
113
 
114
  try:
115
+ from transformers import AutoModelForVision2Seq, AutoProcessor
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ logger.info(f"๐Ÿ” ๋ชจ๋ธ ๊ฒฝ๋กœ: {model_path} (local={'yes' if use_local else 'no'})")
118
+
119
+ if use_local and model_path not in sys.path:
120
+ logger.info(f" -> sys.path์— ์ž„์‹œ ๊ฒฝ๋กœ ์ถ”๊ฐ€: {model_path}")
121
+ sys.path.insert(0, model_path)
122
+
123
+ processor = AutoProcessor.from_pretrained(
 
 
 
124
  model_path,
 
125
  trust_remote_code=True,
126
+ local_files_only=use_local
 
127
  )
128
+
129
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
130
+ selected_dtype = torch.float16
131
+ # CPU bfloat16 ์ง€์› ์—ฌ๋ถ€ ์ฒดํฌ ๋กœ์ง ์ˆ˜์ •
132
+ if device == 'cpu' and not (hasattr(torch.backends, 'mkl') and torch.backends.mkl.is_available() and hasattr(torch, 'float16')):
133
+ selected_dtype = torch.float16
134
+
135
+ model = AutoModelForVision2Seq.from_pretrained(
136
+ model_path,
137
+ trust_remote_code=True,
138
+ torch_dtype=selected_dtype,
139
+ local_files_only=use_local,
140
+ ).to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
+ model.eval()
143
+
144
+ logger.info(f"โœ… {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต! (device={device}, dtype={selected_dtype})")
145
+ return model, processor
146
 
147
  except Exception as e:
148
+ logger.error(f"โŒ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
149
+ raise
150
+
151
+ finally:
152
+ if use_local and model_path in sys.path:
153
+ logger.info(f" -> sys.path์—์„œ ์ž„์‹œ ๊ฒฝ๋กœ ์ œ๊ฑฐ: {model_path}")
154
+ sys.path.remove(model_path)
155
+
156
 
157
  def get_generation_config(self) -> Dict[str, Any]:
158
  # ๋ชจ๋ธ ํŒŒ๋ผ๋ฏธํ„ฐ ์ตœ์ ํ™” ์„ค์ •, max_new_tokens : ์ƒ์„ฑ๋˜๋Š” ํ…์ŠคํŠธ ๊ธธ์ด ์ตœ๋Œ€๊ฐ’ (์ด๋ฏธ์ง€ ์„ค๋ช…์„ ์œ„ํ•ด ์ฆ๊ฐ€)
159
+ return {"max_new_tokens": 50, "temperature": 0.7, "do_sample": True, "top_k": 40, "top_p": 0.9, "repetition_penalty": 1.1}
160
 
161
  def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
162
  """
lily_llm_api/models/kanana_1_5_v_3b_instruct_250819_0915.py ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Kanana-1.5-v-3b-instruct ๋ชจ๋ธ ํ”„๋กœํ•„ (๋‹จ์ˆœ ๋กœ๋”ฉ ์ตœ์ข…๋ณธ)
4
+ """
5
+ import sys
6
+ from typing import Dict, Any, Tuple
7
+ import torch
8
+ import logging
9
+ from transformers import AutoTokenizer
10
+ import os
11
+ from dotenv import load_dotenv
12
+ load_dotenv()
13
+
14
+ HF_TOKEN = os.getenv("HF_TOKEN")
15
+
16
+ logger = logging.getLogger(__name__)
17
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
18
+
19
+ class Kanana15V3bInstructProfile:
20
+ """Kanana-1.5-v-3b-instruct ๋ชจ๋ธ ํ”„๋กœํ•„"""
21
+
22
+ def __init__(self):
23
+ # ํ™˜๊ฒฝ ๊ฐ์ง€
24
+ self.is_local = self._detect_local_environment()
25
+
26
+ # ๋ชจ๋ธ ๊ฒฝ๋กœ ์„ค์ •
27
+ if self.is_local:
28
+ self.model_name = "gbrabbit/lily-math-model" # ๋กœ์ปฌ์—์„œ๋„ HF ๋ชจ๋ธ๋ช… ์‚ฌ์šฉ
29
+ self.local_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
30
+ self.display_name = "kanana-1.5-v-3b-instruct"
31
+ else:
32
+ self.model_name = "gbrabbit/lily-math-model" # Hugging Face Hub ๋ชจ๋ธ ๊ฒฝ๋กœ
33
+ self.local_path = None # ์„œ๋ฒ„์—์„œ๋Š” ๋กœ์ปฌ ๊ฒฝ๋กœ ์‚ฌ์šฉ ์•ˆํ•จ
34
+ self.display_name = "kanana-1.5-v-3b-instruct"
35
+
36
+ self.description = "์นด์นด์˜ค ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๋ชจ๋ธ (3.6B) - Math RAG ํŠนํ™”"
37
+ self.language = "ko"
38
+ self.model_size = "3.6B"
39
+ self.multimodal = True
40
+
41
+ def _detect_local_environment(self) -> bool:
42
+ """๋กœ์ปฌ ํ™˜๊ฒฝ์ธ์ง€ ๊ฐ์ง€"""
43
+ import os
44
+
45
+ # ๋กœ์ปฌ ํ™˜๊ฒฝ ๊ฐ์ง€ ์กฐ๊ฑด๋“ค
46
+ local_indicators = [
47
+ os.path.exists('.env'),
48
+ os.path.exists('../.env'),
49
+ os.path.exists('../../.env'),
50
+ os.getenv('IS_LOCAL') == 'true',
51
+ os.getenv('ENVIRONMENT') == 'local',
52
+ os.getenv('DOCKER_ENV') == 'local',
53
+ # Windows ๊ฒฝ๋กœ ํ™•์ธ
54
+ os.path.exists('C:/Project/lily_generate_project/lily_generate_package/.env'),
55
+ ]
56
+
57
+ is_local = any(local_indicators)
58
+ logger.info(f"๐Ÿ” ํ™˜๊ฒฝ ๊ฐ์ง€: {'๋กœ์ปฌ' if is_local else '์„œ๋ฒ„'}")
59
+ return is_local
60
+
61
+ def _load_environment_variables(self):
62
+ """ํ™˜๊ฒฝ๋ณ€์ˆ˜๋ฅผ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค."""
63
+ import os
64
+
65
+ try:
66
+ if self.is_local:
67
+ # ๋กœ์ปฌ ํ™˜๊ฒฝ: .env ํŒŒ์ผ ๋กœ๋“œ
68
+ from dotenv import load_dotenv
69
+
70
+ # ์—ฌ๋Ÿฌ ๊ฒฝ๋กœ์—์„œ .env ํŒŒ์ผ ์ฐพ๊ธฐ
71
+ env_paths = [
72
+ '.env',
73
+ '../.env',
74
+ '../../.env',
75
+ 'C:/Project/lily_generate_project/lily_generate_package/.env',
76
+ ]
77
+
78
+ env_loaded = False
79
+ for env_path in env_paths:
80
+ if os.path.exists(env_path):
81
+ load_dotenv(env_path)
82
+ logger.info(f"โœ… ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋กœ๋“œ๋จ: {env_path}")
83
+ env_loaded = True
84
+ break
85
+
86
+ if not env_loaded:
87
+ logger.warning("โš ๏ธ .env ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค")
88
+ else:
89
+ # ์„œ๋ฒ„ ํ™˜๊ฒฝ: ์‹œ์Šคํ…œ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ์‚ฌ์šฉ
90
+ logger.info("๐ŸŒ ์„œ๋ฒ„ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ์‚ฌ์šฉ")
91
+
92
+ except ImportError:
93
+ logger.warning("โš ๏ธ python-dotenv๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์Œ")
94
+ except Exception as e:
95
+ logger.error(f"โŒ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋กœ๋“œ ์‹คํŒจ: {e}")
96
+
97
+ def load_model(self) -> Tuple[Any, Any]:
98
+ """ํ™˜๊ฒฝ์— ๋”ฐ๋ผ ๋ชจ๋ธ์„ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค."""
99
+ logger.info(f"๐Ÿ“ฅ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
100
+
101
+ import os
102
+ from pathlib import Path
103
+
104
+ # ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋กœ๋”ฉ
105
+ self._load_environment_variables()
106
+
107
+ try:
108
+ # 1. ๋กœ์ปฌ ์บ์‹œ ๊ฒฝ๋กœ๊ฐ€ ์žˆ๋Š”์ง€ ํ™•์ธ
109
+ use_local = False
110
+ if self.local_path is not None:
111
+ local_model_path = Path(self.local_path)
112
+ use_local = local_model_path.exists() and any(local_model_path.iterdir())
113
+
114
+ if use_local:
115
+ logger.info(f"๐Ÿ—‚๏ธ ๋กœ์ปฌ ๋ชจ๋ธ ์‚ฌ์šฉ: {self.local_path}")
116
+ model_path = self.local_path
117
+ local_files_only = True
118
+
119
+ # ๋กœ์ปฌ ๋ชจ๋ธ์˜ ๊ฒฝ์šฐ sys.path์— ์ถ”๊ฐ€
120
+ if self.local_path not in sys.path:
121
+ sys.path.insert(0, self.local_path)
122
+ else:
123
+ logger.info(f"๐ŸŒ Hugging Face Hub์—์„œ ๋‹ค์šด๋กœ๋“œ: {self.model_name}")
124
+ model_path = self.model_name
125
+ local_files_only = False
126
+
127
+ # ํ™˜๊ฒฝ๋ณ„ ์ถ”๊ฐ€ ์„ค์ •
128
+ if self.is_local:
129
+ logger.info("๐Ÿ  ๋กœ์ปฌ ํ™˜๊ฒฝ ์„ค์ • ์ ์šฉ")
130
+ # ๋กœ์ปฌ ํ™˜๊ฒฝ์—์„œ๋Š” ์ถ”๊ฐ€ ์„ค์ •์ด ํ•„์š”ํ•  ์ˆ˜ ์žˆ์Œ
131
+ else:
132
+ logger.info("๏ฟฝ๏ฟฝ๏ฟฝ๏ธ ์„œ๋ฒ„ ํ™˜๊ฒฝ ์„ค์ • ์ ์šฉ")
133
+ # ์„œ๋ฒ„ ํ™˜๊ฒฝ์—์„œ๋Š” ์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ ๋“ฑ ์„ค์ •
134
+
135
+ # DEBUG: ๋ชจ๋ธ ๊ฒฝ๋กœ์™€ ์„ค์ • ์ถœ๋ ฅ
136
+ logger.info(f"๐Ÿ” DEBUG: model_path = {model_path}")
137
+ logger.info(f"๐Ÿ” DEBUG: local_files_only = {local_files_only}")
138
+ logger.info(f"๐Ÿ” DEBUG: HF_TOKEN = {'์žˆ์Œ' if HF_TOKEN else '์—†์Œ'}")
139
+ logger.info(f"๐Ÿ” DEBUG: use_local = {use_local}")
140
+ logger.info(f"๐Ÿ” DEBUG: is_local = {self.is_local}")
141
+
142
+ # 2. ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ
143
+ logger.info(f"๐Ÿ” DEBUG: ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ ์‹œ์ž‘ - model_path={model_path}")
144
+ tokenizer = AutoTokenizer.from_pretrained(
145
+ model_path,
146
+ token=HF_TOKEN,
147
+ trust_remote_code=True,
148
+ local_files_only=local_files_only,
149
+ cache_dir="/app/cache/transformers" if not use_local else None
150
+ )
151
+ logger.info(f"โœ… ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ ์™„๋ฃŒ ({tokenizer.__class__.__name__})")
152
+
153
+ # 3. ๋ชจ๋ธ ๋กœ๋“œ
154
+ logger.info(f"๐Ÿ” DEBUG: ๋ชจ๋ธ ๋กœ๋“œ ์‹œ์ž‘ - use_local={use_local}")
155
+ from modeling import KananaVForConditionalGeneration
156
+
157
+ if use_local:
158
+ # ๋กœ์ปฌ ๋ชจ๋ธ: ์ปค์Šคํ…€ ๋ชจ๋ธ๋ง ํด๋ž˜์Šค ์‚ฌ์šฉ
159
+ logger.info("๐Ÿ” DEBUG: ๋กœ์ปฌ ๋ชจ๋ธ ๋กœ๋“œ ์‹œ๋„")
160
+ selected_dtype = torch.float16 if DEVICE == "cuda" else torch.float16
161
+ logger.info(f"๐Ÿ” DEBUG: selected_dtype = {selected_dtype}")
162
+ model = KananaVForConditionalGeneration.from_pretrained(
163
+ model_path,
164
+ token=HF_TOKEN,
165
+ trust_remote_code=True,
166
+ torch_dtype=selected_dtype,
167
+ local_files_only=True,
168
+ # low_cpu_mem_usage=True,
169
+ ).to(DEVICE)
170
+ else:
171
+ # ์„œ๋ฒ„ ํ™˜๊ฒฝ: KananaVForConditionalGeneration ์ง์ ‘ ์‚ฌ์šฉ
172
+ logger.info("๐Ÿ” DEBUG: ์„œ๋ฒ„ ๋ชจ๋ธ ๋กœ๋“œ ์‹œ๋„")
173
+ logger.info("๐Ÿ” DEBUG: modeling ๋ชจ๋“ˆ import ์‹œ๋„")
174
+ try:
175
+ logger.info("๐Ÿ” DEBUG: modeling ๋ชจ๋“ˆ import ์„ฑ๊ณต")
176
+ except ImportError as e:
177
+ logger.error(f"๐Ÿ” DEBUG: modeling ๋ชจ๋“ˆ import ์‹คํŒจ - {e}")
178
+ raise
179
+
180
+ logger.info("๐Ÿ” DEBUG: KananaVForConditionalGeneration.from_pretrained ํ˜ธ์ถœ")
181
+ # CPU ํ™˜๊ฒฝ์—์„œ float16/bfloat16๋ณด๋‹ค float32๊ฐ€ ๋” ์•ˆ์ •์ ์ธ ๊ฒฝ์šฐ๊ฐ€ ๋งŽ์Œ
182
+ selected_dtype = torch.float16 if DEVICE == "cuda" else torch.float16
183
+ model = KananaVForConditionalGeneration.from_pretrained(
184
+ model_path,
185
+ token=HF_TOKEN,
186
+ torch_dtype=selected_dtype,
187
+ trust_remote_code=True,
188
+ cache_dir="/app/cache/transformers",
189
+ # device_map="auto",
190
+ # low_cpu_mem_usage=True,
191
+ ).to(DEVICE)
192
+
193
+ logger.info(f"โœ… ๋ชจ๋ธ ๋กœ๋“œ ์™„๋ฃŒ ({model.__class__.__name__})")
194
+ return model, tokenizer
195
+
196
+ except Exception as e:
197
+ logger.error(f"โŒ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}", exc_info=True)
198
+ if use_local and self.local_path in sys.path:
199
+ sys.path.remove(self.local_path)
200
+ raise
201
+
202
+ def get_generation_config(self) -> Dict[str, Any]:
203
+ # ๋ชจ๋ธ ํŒŒ๋ผ๋ฏธํ„ฐ ์ตœ์ ํ™” ์„ค์ •, max_new_tokens : ์ƒ์„ฑ๋˜๋Š” ํ…์ŠคํŠธ ๊ธธ์ด ์ตœ๋Œ€๊ฐ’ (์ด๋ฏธ์ง€ ์„ค๋ช…์„ ์œ„ํ•ด ์ฆ๊ฐ€)
204
+ return {"max_new_tokens": 256, "temperature": 0.7, "do_sample": True, "top_k": 40, "top_p": 0.9, "repetition_penalty": 1.1}
205
+
206
+ def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
207
+ """
208
+ ๋‹ค์–‘ํ•œ ์‘๋‹ต ํ˜•์‹์„ ์ฒ˜๋ฆฌํ•  ์ˆ˜ ์žˆ๋Š” ๋” ๋˜‘๋˜‘ํ•œ ์‘๋‹ต ์ถ”์ถœ ํ•จ์ˆ˜
209
+ """
210
+ logger.info(f"--- ์‘๋‹ต ์ถ”์ถœ ์‹œ์ž‘ ---")
211
+ logger.info(f"์ „์ฒด ์ƒ์„ฑ ํ…์ŠคํŠธ (Raw): \n---\n{full_text}\n---")
212
+
213
+ # ํ”„๋กฌํ”„ํŠธ๊ฐ€ ์ œ๊ณต๋œ ๊ฒฝ์šฐ ์ด๋ฅผ ์ œ๊ฑฐ
214
+ if formatted_prompt and formatted_prompt in full_text:
215
+ response = full_text.replace(formatted_prompt, "").strip()
216
+ logger.info(f"โœ… ์„ฑ๊ณต: ํ”„๋กฌํ”„ํŠธ ์ œ๊ฑฐ๋กœ ์‘๋‹ต ์ถ”์ถœ")
217
+ logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
218
+ if response: # ๋นˆ ๋ฌธ์ž์—ด์ด ์•„๋‹Œ ๊ฒฝ์šฐ์—๋งŒ ๋ฐ˜ํ™˜
219
+ return response
220
+
221
+ # 1์ˆœ์œ„: ๊ฐ€์žฅ ์ •ํ™•ํ•œ ํŠน์ˆ˜ ํƒœ๊ทธ๋กœ ์ถ”์ถœ ์‹œ๋„
222
+ # ์˜ˆ: <|start_header_id|>assistant<|end_header_id|>์•ˆ๋…•ํ•˜์„ธ์š”...
223
+ # ๋˜๋Š” <|im_start|>assistant์•ˆ๋…•ํ•˜์„ธ์š”...
224
+ assistant_tags = [
225
+ "<|start_header_id|>assistant<|end_header_id|>",
226
+ "<|im_start|>assistant",
227
+ "assistant\n",
228
+ "assistant:"
229
+ ]
230
+ for tag in assistant_tags:
231
+ if tag in full_text:
232
+ parts = full_text.split(tag)
233
+ if len(parts) > 1:
234
+ response = parts[-1].strip()
235
+ # ์ถ”๊ฐ€ ์ •๋ฆฌ: ํŠน์ˆ˜ ํ† ํฐ ์ œ๊ฑฐ
236
+ response = response.replace("<|im_end|>", "").strip()
237
+ logger.info(f"โœ… ์„ฑ๊ณต: '{tag}' ํƒœ๊ทธ๋กœ ์‘๋‹ต ์ถ”์ถœ")
238
+ logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
239
+ if response: # ๋นˆ ๋ฌธ์ž์—ด์ด ์•„๋‹Œ ๊ฒฝ์šฐ์—๋งŒ ๋ฐ˜ํ™˜
240
+ return response
241
+
242
+ # 2์ˆœ์œ„: ๊ฐ„๋‹จํ•œ ํ‚ค์›Œ๋“œ๋กœ ์ถ”์ถœ ์‹œ๋„
243
+ # ์˜ˆ: ... user ์•ˆ๋…•ํ•˜์„ธ์š” assistant ์•ˆ๋…•ํ•˜์„ธ์š” ...
244
+ if "assistant" in full_text:
245
+ parts = full_text.split("assistant")
246
+ if len(parts) > 1:
247
+ response = parts[-1].strip()
248
+ response = response.replace("<|im_end|>", "").strip()
249
+ logger.info("โœ… ์„ฑ๊ณต: 'assistant' ํ‚ค์›Œ๋“œ๋กœ ์‘๋‹ต ์ถ”์ถœ")
250
+ logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
251
+ if response: # ๋นˆ ๋ฌธ์ž์—ด์ด ์•„๋‹Œ ๊ฒฝ์šฐ์—๋งŒ ๋ฐ˜ํ™˜
252
+ return response
253
+
254
+ # 3์ˆœ์œ„: ํ”„๋กฌํ”„ํŠธ๊ฐ€ ์—†๋Š” ๊ฒฝ์šฐ, ์ „์ฒด ํ…์ŠคํŠธ์—์„œ ๋ถˆํ•„์š”ํ•œ ๋ถ€๋ถ„ ์ œ๊ฑฐ
255
+ clean_text = full_text.strip()
256
+ # ์ผ๋ฐ˜์ ์ธ ํ”„๋กฌํ”„ํŠธ ํŒจํ„ด ์ œ๊ฑฐ ์‹œ๋„
257
+ patterns_to_remove = [
258
+ "<|im_start|>user\n",
259
+ "<|im_end|>",
260
+ "<image>",
261
+ "user\n",
262
+ "assistant\n"
263
+ ]
264
+
265
+ for pattern in patterns_to_remove:
266
+ clean_text = clean_text.replace(pattern, "")
267
+
268
+ clean_text = clean_text.strip()
269
+
270
+ if clean_text and clean_text != full_text:
271
+ logger.info("โœ… ์„ฑ๊ณต: ํŒจํ„ด ์ œ๊ฑฐ๋กœ ์‘๋‹ต ์ •๋ฆฌ")
272
+ logger.info(f"์ •๋ฆฌ๋œ ์‘๋‹ต: {clean_text}")
273
+ return clean_text
274
+
275
+ logger.warning("โš ๏ธ ๊ฒฝ๊ณ : ์‘๋‹ต์—์„œ assistant ๋ถ€๋ถ„์„ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค. ์ „์ฒด ํ…์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.")
276
+ logger.info(f"์ตœ์ข… ๋ฐ˜ํ™˜ ํ…์ŠคํŠธ: {full_text}")
277
+ return full_text
278
+
279
+ def get_model_info(self) -> Dict[str, Any]:
280
+ return {"model_name": self.model_name, "display_name": self.display_name, "description": self.description, "language": self.language, "model_size": self.model_size, "local_path": self.local_path, "multimodal": self.multimodal}
lily_llm_api/models/kanana_1_5_v_3b_instruct_250819_1134py ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Kanana-1.5-v-3b-instruct ๋ชจ๋ธ ํ”„๋กœํ•„ (๋‹จ์ˆœ ๋กœ๋”ฉ ์ตœ์ข…๋ณธ)
4
+ """
5
+ import sys
6
+ from typing import Dict, Any, Tuple
7
+ import torch
8
+ import logging
9
+ from transformers import AutoTokenizer
10
+ import os
11
+ from dotenv import load_dotenv
12
+ from pathlib import Path
13
+
14
+ load_dotenv()
15
+
16
+ HF_TOKEN = os.getenv("HF_TOKEN")
17
+
18
+ logger = logging.getLogger(__name__)
19
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
20
+
21
+ class Kanana15V3bInstructProfile:
22
+ """Kanana-1.5-v-3b-instruct ๋ชจ๋ธ ํ”„๋กœํ•„"""
23
+
24
+ def __init__(self):
25
+ # ํ™˜๊ฒฝ ๊ฐ์ง€
26
+ self.is_local = self._detect_local_environment()
27
+
28
+ # ๋ชจ๋ธ ๊ฒฝ๋กœ ์„ค์ •
29
+ if self.is_local:
30
+ self.model_name = "gbrabbit/lily-math-model" # ๋กœ์ปฌ์—์„œ๋„ HF ๋ชจ๋ธ๋ช… ์‚ฌ์šฉ
31
+ self.local_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
32
+ self.display_name = "kanana-1.5-v-3b-instruct"
33
+ else:
34
+ self.model_name = "gbrabbit/lily-math-model" # Hugging Face Hub ๋ชจ๋ธ ๊ฒฝ๋กœ
35
+ self.local_path = None # ์„œ๋ฒ„์—์„œ๋Š” ๋กœ์ปฌ ๊ฒฝ๋กœ ์‚ฌ์šฉ ์•ˆํ•จ
36
+ self.display_name = "kanana-1.5-v-3b-instruct"
37
+
38
+ self.description = "์นด์นด์˜ค ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๋ชจ๋ธ (3.6B) - Math RAG ํŠนํ™”"
39
+ self.language = "ko"
40
+ self.model_size = "3.6B"
41
+ self.multimodal = True
42
+
43
+ def _detect_local_environment(self) -> bool:
44
+ """๋กœ์ปฌ ํ™˜๊ฒฝ์ธ์ง€ ๊ฐ์ง€"""
45
+ import os
46
+
47
+ # ๋กœ์ปฌ ํ™˜๊ฒฝ ๊ฐ์ง€ ์กฐ๊ฑด๋“ค
48
+ local_indicators = [
49
+ os.path.exists('.env'),
50
+ os.path.exists('../.env'),
51
+ os.path.exists('../../.env'),
52
+ os.getenv('IS_LOCAL') == 'true',
53
+ os.getenv('ENVIRONMENT') == 'local',
54
+ os.getenv('DOCKER_ENV') == 'local',
55
+ # Windows ๊ฒฝ๋กœ ํ™•์ธ
56
+ os.path.exists('C:/Project/lily_generate_project/lily_generate_package/.env'),
57
+ ]
58
+
59
+ is_local = any(local_indicators)
60
+ logger.info(f"๐Ÿ” ํ™˜๊ฒฝ ๊ฐ์ง€: {'๋กœ์ปฌ' if is_local else '์„œ๋ฒ„'}")
61
+ return is_local
62
+
63
+ def _load_environment_variables(self):
64
+ """ํ™˜๊ฒฝ๋ณ€์ˆ˜๋ฅผ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค."""
65
+ import os
66
+
67
+ try:
68
+ if self.is_local:
69
+ # ๋กœ์ปฌ ํ™˜๊ฒฝ: .env ํŒŒ์ผ ๋กœ๋“œ
70
+ from dotenv import load_dotenv
71
+
72
+ # ์—ฌ๋Ÿฌ ๊ฒฝ๋กœ์—์„œ .env ํŒŒ์ผ ์ฐพ๊ธฐ
73
+ env_paths = [
74
+ '.env',
75
+ '../.env',
76
+ '../../.env',
77
+ 'C:/Project/lily_generate_project/lily_generate_package/.env',
78
+ ]
79
+
80
+ env_loaded = False
81
+ for env_path in env_paths:
82
+ if os.path.exists(env_path):
83
+ load_dotenv(env_path)
84
+ logger.info(f"โœ… ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋กœ๋“œ๋จ: {env_path}")
85
+ env_loaded = True
86
+ break
87
+
88
+ if not env_loaded:
89
+ logger.warning("โš ๏ธ .env ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค")
90
+ else:
91
+ # ์„œ๋ฒ„ ํ™˜๊ฒฝ: ์‹œ์Šคํ…œ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ์‚ฌ์šฉ
92
+ logger.info("๐ŸŒ ์„œ๋ฒ„ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ์‚ฌ์šฉ")
93
+
94
+ except ImportError:
95
+ logger.warning("โš ๏ธ python-dotenv๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์Œ")
96
+ except Exception as e:
97
+ logger.error(f"โŒ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋กœ๋“œ ์‹คํŒจ: {e}")
98
+
99
+ def load_model(self) -> Tuple[Any, Any]:
100
+ """๋ชจ๋ธ ๋กœ๋“œ (๊ณต์‹์ ์ธ ๋ฐฉ๋ฒ• + ์ ˆ๋Œ€ ๊ฒฝ๋กœ sys.path ์ˆ˜์ • ์ตœ์ข…๋ณธ)"""
101
+ logger.info(f"๐Ÿ“ฅ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์ค‘ (๊ณต์‹ ๋ฐฉ๋ฒ•)...")
102
+
103
+ import sys
104
+ from pathlib import Path
105
+ import os # <<< os ๋ชจ๋“ˆ ์ถ”๊ฐ€
106
+
107
+ # self.local_path๋ฅผ ์ ˆ๋Œ€ ๊ฒฝ๋กœ๋กœ ๋ณ€ํ™˜
108
+ absolute_model_path = os.path.abspath(self.local_path)
109
+
110
+ use_local = Path(absolute_model_path).exists() and any(Path(absolute_model_path).iterdir())
111
+ # model_path ๋ณ€์ˆ˜์— ์ ˆ๋Œ€ ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉ
112
+ model_path = absolute_model_path if use_local else self.model_name
113
+
114
+ try:
115
+ from transformers import AutoModelForVision2Seq, AutoProcessor
116
+
117
+ logger.info(f"๐Ÿ” ๋ชจ๋ธ ๊ฒฝ๋กœ: {model_path} (local={'yes' if use_local else 'no'})")
118
+
119
+ if use_local and model_path not in sys.path:
120
+ logger.info(f" -> sys.path์— ์ž„์‹œ ๊ฒฝ๋กœ ์ถ”๊ฐ€: {model_path}")
121
+ sys.path.insert(0, model_path)
122
+
123
+ processor = AutoProcessor.from_pretrained(
124
+ model_path,
125
+ trust_remote_code=True,
126
+ local_files_only=use_local
127
+ )
128
+
129
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
130
+ selected_dtype = torch.bfloat16
131
+ # CPU bfloat16 ์ง€์› ์—ฌ๋ถ€ ์ฒดํฌ ๋กœ์ง ์ˆ˜์ •
132
+ if device == 'cpu' and not (hasattr(torch.backends, 'mkl') and torch.backends.mkl.is_available() and hasattr(torch, 'bfloat16')):
133
+ selected_dtype = torch.float32
134
+ logger.warning("CPU์—์„œ bfloat16์„ ์ง€์›ํ•˜์ง€ ์•Š๊ฑฐ๋‚˜ MKL์ด ์—†์–ด float32๋กœ ๋ณ€๊ฒฝํ•ฉ๋‹ˆ๋‹ค.")
135
+
136
+ model = AutoModelForVision2Seq.from_pretrained(
137
+ model_path,
138
+ trust_remote_code=True,
139
+ torch_dtype=selected_dtype,
140
+ local_files_only=use_local,
141
+ ).to(device)
142
+
143
+ model.eval()
144
+
145
+ logger.info(f"โœ… {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต! (device={device}, dtype={selected_dtype})")
146
+ return model, processor
147
+
148
+ except Exception as e:
149
+ logger.error(f"โŒ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
150
+ raise
151
+
152
+ finally:
153
+ if use_local and model_path in sys.path:
154
+ logger.info(f" -> sys.path์—์„œ ์ž„์‹œ ๊ฒฝ๋กœ ์ œ๊ฑฐ: {model_path}")
155
+ sys.path.remove(model_path)
156
+
157
+
158
+ def get_generation_config(self) -> Dict[str, Any]:
159
+ # ๋ชจ๋ธ ํŒŒ๋ผ๋ฏธํ„ฐ ์ตœ์ ํ™” ์„ค์ •, max_new_tokens : ์ƒ์„ฑ๋˜๋Š” ํ…์ŠคํŠธ ๊ธธ์ด ์ตœ๋Œ€๊ฐ’ (์ด๋ฏธ์ง€ ์„ค๋ช…์„ ์œ„ํ•ด ์ฆ๊ฐ€)
160
+ return {"max_new_tokens": 256, "temperature": 0.7, "do_sample": True, "top_k": 40, "top_p": 0.9, "repetition_penalty": 1.1}
161
+
162
+ def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
163
+ """
164
+ ๋‹ค์–‘ํ•œ ์‘๋‹ต ํ˜•์‹์„ ์ฒ˜๋ฆฌํ•  ์ˆ˜ ์žˆ๋Š” ๋” ๋˜‘๋˜‘ํ•œ ์‘๋‹ต ์ถ”์ถœ ํ•จ์ˆ˜
165
+ """
166
+ logger.info(f"--- ์‘๋‹ต ์ถ”์ถœ ์‹œ์ž‘ ---")
167
+ logger.info(f"์ „์ฒด ์ƒ์„ฑ ํ…์ŠคํŠธ (Raw): \n---\n{full_text}\n---")
168
+
169
+ # ํ”„๋กฌํ”„ํŠธ๊ฐ€ ์ œ๊ณต๋œ ๊ฒฝ์šฐ ์ด๋ฅผ ์ œ๊ฑฐ
170
+ if formatted_prompt and formatted_prompt in full_text:
171
+ response = full_text.replace(formatted_prompt, "").strip()
172
+ logger.info(f"โœ… ์„ฑ๊ณต: ํ”„๋กฌํ”„ํŠธ ์ œ๊ฑฐ๋กœ ์‘๋‹ต ์ถ”์ถœ")
173
+ logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
174
+ if response: # ๋นˆ ๋ฌธ์ž์—ด์ด ์•„๋‹Œ ๊ฒฝ์šฐ์—๋งŒ ๋ฐ˜ํ™˜
175
+ return response
176
+
177
+ # 1์ˆœ์œ„: ๊ฐ€์žฅ ์ •ํ™•ํ•œ ํŠน์ˆ˜ ํƒœ๊ทธ๋กœ ์ถ”์ถœ ์‹œ๋„
178
+ # ์˜ˆ: <|start_header_id|>assistant<|end_header_id|>์•ˆ๋…•ํ•˜์„ธ์š”...
179
+ # ๋˜๋Š” <|im_start|>assistant์•ˆ๋…•ํ•˜์„ธ์š”...
180
+ assistant_tags = [
181
+ "<|start_header_id|>assistant<|end_header_id|>",
182
+ "<|im_start|>assistant",
183
+ "assistant\n",
184
+ "assistant:"
185
+ ]
186
+ for tag in assistant_tags:
187
+ if tag in full_text:
188
+ parts = full_text.split(tag)
189
+ if len(parts) > 1:
190
+ response = parts[-1].strip()
191
+ # ์ถ”๊ฐ€ ์ •๋ฆฌ: ํŠน์ˆ˜ ํ† ํฐ ์ œ๊ฑฐ
192
+ response = response.replace("<|im_end|>", "").strip()
193
+ logger.info(f"โœ… ์„ฑ๊ณต: '{tag}' ํƒœ๊ทธ๋กœ ์‘๋‹ต ์ถ”์ถœ")
194
+ logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
195
+ if response: # ๋นˆ ๋ฌธ์ž์—ด์ด ์•„๋‹Œ ๊ฒฝ์šฐ์—๋งŒ ๋ฐ˜ํ™˜
196
+ return response
197
+
198
+ # 2์ˆœ์œ„: ๊ฐ„๋‹จํ•œ ํ‚ค์›Œ๋“œ๋กœ ์ถ”์ถœ ์‹œ๋„
199
+ # ์˜ˆ: ... user ์•ˆ๋…•ํ•˜์„ธ์š” assistant ์•ˆ๋…•ํ•˜์„ธ์š” ...
200
+ if "assistant" in full_text:
201
+ parts = full_text.split("assistant")
202
+ if len(parts) > 1:
203
+ response = parts[-1].strip()
204
+ response = response.replace("<|im_end|>", "").strip()
205
+ logger.info("โœ… ์„ฑ๊ณต: 'assistant' ํ‚ค์›Œ๋“œ๋กœ ์‘๋‹ต ์ถ”์ถœ")
206
+ logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
207
+ if response: # ๋นˆ ๋ฌธ์ž์—ด์ด ์•„๋‹Œ ๊ฒฝ์šฐ์—๋งŒ ๋ฐ˜ํ™˜
208
+ return response
209
+
210
+ # 3์ˆœ์œ„: ํ”„๋กฌํ”„ํŠธ๊ฐ€ ์—†๋Š” ๊ฒฝ์šฐ, ์ „์ฒด ํ…์ŠคํŠธ์—์„œ ๋ถˆํ•„์š”ํ•œ ๋ถ€๋ถ„ ์ œ๊ฑฐ
211
+ clean_text = full_text.strip()
212
+ # ์ผ๋ฐ˜์ ์ธ ํ”„๋กฌํ”„ํŠธ ํŒจํ„ด ์ œ๊ฑฐ ์‹œ๋„
213
+ patterns_to_remove = [
214
+ "<|im_start|>user\n",
215
+ "<|im_end|>",
216
+ "<image>",
217
+ "user\n",
218
+ "assistant\n"
219
+ ]
220
+
221
+ for pattern in patterns_to_remove:
222
+ clean_text = clean_text.replace(pattern, "")
223
+
224
+ clean_text = clean_text.strip()
225
+
226
+ if clean_text and clean_text != full_text:
227
+ logger.info("โœ… ์„ฑ๊ณต: ํŒจํ„ด ์ œ๊ฑฐ๋กœ ์‘๋‹ต ์ •๋ฆฌ")
228
+ logger.info(f"์ •๋ฆฌ๋œ ์‘๋‹ต: {clean_text}")
229
+ return clean_text
230
+
231
+ logger.warning("โš ๏ธ ๊ฒฝ๊ณ : ์‘๋‹ต์—์„œ assistant ๋ถ€๋ถ„์„ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค. ์ „์ฒด ํ…์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.")
232
+ logger.info(f"์ตœ์ข… ๋ฐ˜ํ™˜ ํ…์ŠคํŠธ: {full_text}")
233
+ return full_text
234
+
235
+ def get_model_info(self) -> Dict[str, Any]:
236
+ return {"model_name": self.model_name, "display_name": self.display_name, "description": self.description, "language": self.language, "model_size": self.model_size, "local_path": self.local_path, "multimodal": self.multimodal}
lily_llm_api/models/kanana_1_5_v_3b_instruct_250819_1304.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Kanana-1.5-v-3b-instruct ๋ชจ๋ธ ํ”„๋กœํ•„ (๋‹จ์ˆœ ๋กœ๋”ฉ ์ตœ์ข…๋ณธ)
4
+ """
5
+ import sys
6
+ from typing import Dict, Any, Tuple
7
+ import torch
8
+ import logging
9
+ from transformers import AutoTokenizer
10
+ import os
11
+ from dotenv import load_dotenv
12
+ from pathlib import Path
13
+
14
+ load_dotenv()
15
+
16
+ HF_TOKEN = os.getenv("HF_TOKEN")
17
+
18
+ logger = logging.getLogger(__name__)
19
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
20
+
21
+ class Kanana15V3bInstructProfile:
22
+ """Kanana-1.5-v-3b-instruct ๋ชจ๋ธ ํ”„๋กœํ•„"""
23
+
24
+ def __init__(self):
25
+ # ํ™˜๊ฒฝ ๊ฐ์ง€
26
+ self.is_local = self._detect_local_environment()
27
+
28
+ # ๋ชจ๋ธ ๊ฒฝ๋กœ ์„ค์ •
29
+ if self.is_local:
30
+ self.model_name = "gbrabbit/lily-math-model" # ๋กœ์ปฌ์—์„œ๋„ HF ๋ชจ๋ธ๋ช… ์‚ฌ์šฉ
31
+ self.local_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
32
+ self.display_name = "kanana-1.5-v-3b-instruct"
33
+ else:
34
+ self.model_name = "gbrabbit/lily-math-model" # Hugging Face Hub ๋ชจ๋ธ ๊ฒฝ๋กœ
35
+ self.local_path = None # ์„œ๋ฒ„์—์„œ๋Š” ๋กœ์ปฌ ๊ฒฝ๋กœ ์‚ฌ์šฉ ์•ˆํ•จ
36
+ self.display_name = "kanana-1.5-v-3b-instruct"
37
+
38
+ self.description = "์นด์นด์˜ค ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๋ชจ๋ธ (3.6B) - Math RAG ํŠนํ™”"
39
+ self.language = "ko"
40
+ self.model_size = "3.6B"
41
+ self.multimodal = True
42
+
43
+ def _detect_local_environment(self) -> bool:
44
+ """๋กœ์ปฌ ํ™˜๊ฒฝ์ธ์ง€ ๊ฐ์ง€"""
45
+ import os
46
+
47
+ # ๋กœ์ปฌ ํ™˜๊ฒฝ ๊ฐ์ง€ ์กฐ๊ฑด๋“ค
48
+ local_indicators = [
49
+ os.path.exists('.env'),
50
+ os.path.exists('../.env'),
51
+ os.path.exists('../../.env'),
52
+ os.getenv('IS_LOCAL') == 'true',
53
+ os.getenv('ENVIRONMENT') == 'local',
54
+ os.getenv('DOCKER_ENV') == 'local',
55
+ # Windows ๊ฒฝ๋กœ ํ™•์ธ
56
+ os.path.exists('C:/Project/lily_generate_project/lily_generate_package/.env'),
57
+ ]
58
+
59
+ is_local = any(local_indicators)
60
+ logger.info(f"๐Ÿ” ํ™˜๊ฒฝ ๊ฐ์ง€: {'๋กœ์ปฌ' if is_local else '์„œ๋ฒ„'}")
61
+ return is_local
62
+
63
+ def _load_environment_variables(self):
64
+ """ํ™˜๊ฒฝ๋ณ€์ˆ˜๋ฅผ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค."""
65
+ import os
66
+
67
+ try:
68
+ if self.is_local:
69
+ # ๋กœ์ปฌ ํ™˜๊ฒฝ: .env ํŒŒ์ผ ๋กœ๋“œ
70
+ from dotenv import load_dotenv
71
+
72
+ # ์—ฌ๋Ÿฌ ๊ฒฝ๋กœ์—์„œ .env ํŒŒ์ผ ์ฐพ๊ธฐ
73
+ env_paths = [
74
+ '.env',
75
+ '../.env',
76
+ '../../.env',
77
+ 'C:/Project/lily_generate_project/lily_generate_package/.env',
78
+ ]
79
+
80
+ env_loaded = False
81
+ for env_path in env_paths:
82
+ if os.path.exists(env_path):
83
+ load_dotenv(env_path)
84
+ logger.info(f"โœ… ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋กœ๋“œ๋จ: {env_path}")
85
+ env_loaded = True
86
+ break
87
+
88
+ if not env_loaded:
89
+ logger.warning("โš ๏ธ .env ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค")
90
+ else:
91
+ # ์„œ๋ฒ„ ํ™˜๊ฒฝ: ์‹œ์Šคํ…œ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ์‚ฌ์šฉ
92
+ logger.info("๐ŸŒ ์„œ๋ฒ„ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ์‚ฌ์šฉ")
93
+
94
+ except ImportError:
95
+ logger.warning("โš ๏ธ python-dotenv๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์Œ")
96
+ except Exception as e:
97
+ logger.error(f"โŒ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋กœ๋“œ ์‹คํŒจ: {e}")
98
+
99
+ def load_model(self) -> Tuple[Any, Any]:
100
+ """๋ชจ๋ธ ๋กœ๋“œ (๊ณต์‹์ ์ธ ๋ฐฉ๋ฒ• + ์ ˆ๋Œ€ ๊ฒฝ๋กœ sys.path ์ˆ˜์ • ์ตœ์ข…๋ณธ)"""
101
+ logger.info(f"๐Ÿ“ฅ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์ค‘ (๊ณต์‹ ๋ฐฉ๋ฒ•)...")
102
+
103
+ import sys
104
+ from pathlib import Path
105
+ import os # <<< os ๋ชจ๋“ˆ ์ถ”๊ฐ€
106
+
107
+ # self.local_path๋ฅผ ์ ˆ๋Œ€ ๊ฒฝ๋กœ๋กœ ๋ณ€ํ™˜
108
+ absolute_model_path = os.path.abspath(self.local_path)
109
+
110
+ use_local = Path(absolute_model_path).exists() and any(Path(absolute_model_path).iterdir())
111
+ # model_path ๋ณ€์ˆ˜์— ์ ˆ๋Œ€ ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉ
112
+ model_path = absolute_model_path if use_local else self.model_name
113
+
114
+ try:
115
+ from transformers import AutoModelForVision2Seq, AutoProcessor
116
+
117
+ logger.info(f"๐Ÿ” ๋ชจ๋ธ ๊ฒฝ๋กœ: {model_path} (local={'yes' if use_local else 'no'})")
118
+
119
+ if use_local and model_path not in sys.path:
120
+ logger.info(f" -> sys.path์— ์ž„์‹œ ๊ฒฝ๋กœ ์ถ”๊ฐ€: {model_path}")
121
+ sys.path.insert(0, model_path)
122
+
123
+ processor = AutoProcessor.from_pretrained(
124
+ model_path,
125
+ trust_remote_code=True,
126
+ local_files_only=use_local
127
+ )
128
+
129
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
130
+ selected_dtype = torch.bfloat16
131
+ # CPU bfloat16 ์ง€์› ์—ฌ๋ถ€ ์ฒดํฌ ๋กœ์ง ์ˆ˜์ •
132
+ if device == 'cpu' and not (hasattr(torch.backends, 'mkl') and torch.backends.mkl.is_available() and hasattr(torch, 'bfloat16')):
133
+ selected_dtype = torch.bfloat16
134
+
135
+ model = AutoModelForVision2Seq.from_pretrained(
136
+ model_path,
137
+ trust_remote_code=True,
138
+ torch_dtype=selected_dtype,
139
+ local_files_only=use_local,
140
+ ).to(device)
141
+
142
+ model.eval()
143
+
144
+ logger.info(f"โœ… {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต! (device={device}, dtype={selected_dtype})")
145
+ return model, processor
146
+
147
+ except Exception as e:
148
+ logger.error(f"โŒ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
149
+ raise
150
+
151
+ finally:
152
+ if use_local and model_path in sys.path:
153
+ logger.info(f" -> sys.path์—์„œ ์ž„์‹œ ๊ฒฝ๋กœ ์ œ๊ฑฐ: {model_path}")
154
+ sys.path.remove(model_path)
155
+
156
+
157
+ def get_generation_config(self) -> Dict[str, Any]:
158
+ # ๋ชจ๋ธ ํŒŒ๋ผ๋ฏธํ„ฐ ์ตœ์ ํ™” ์„ค์ •, max_new_tokens : ์ƒ์„ฑ๋˜๋Š” ํ…์ŠคํŠธ ๊ธธ์ด ์ตœ๋Œ€๊ฐ’ (์ด๋ฏธ์ง€ ์„ค๋ช…์„ ์œ„ํ•ด ์ฆ๊ฐ€)
159
+ return {"max_new_tokens": 20, "temperature": 0.7, "do_sample": True, "top_k": 40, "top_p": 0.9, "repetition_penalty": 1.1}
160
+
161
+ def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
162
+ """
163
+ ๋‹ค์–‘ํ•œ ์‘๋‹ต ํ˜•์‹์„ ์ฒ˜๋ฆฌํ•  ์ˆ˜ ์žˆ๋Š” ๋” ๋˜‘๋˜‘ํ•œ ์‘๋‹ต ์ถ”์ถœ ํ•จ์ˆ˜
164
+ """
165
+ logger.info(f"--- ์‘๋‹ต ์ถ”์ถœ ์‹œ์ž‘ ---")
166
+ logger.info(f"์ „์ฒด ์ƒ์„ฑ ํ…์ŠคํŠธ (Raw): \n---\n{full_text}\n---")
167
+
168
+ # ํ”„๋กฌํ”„ํŠธ๊ฐ€ ์ œ๊ณต๋œ ๊ฒฝ์šฐ ์ด๋ฅผ ์ œ๊ฑฐ
169
+ if formatted_prompt and formatted_prompt in full_text:
170
+ response = full_text.replace(formatted_prompt, "").strip()
171
+ logger.info(f"โœ… ์„ฑ๊ณต: ํ”„๋กฌํ”„ํŠธ ์ œ๊ฑฐ๋กœ ์‘๋‹ต ์ถ”์ถœ")
172
+ logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
173
+ if response: # ๋นˆ ๋ฌธ์ž์—ด์ด ์•„๋‹Œ ๊ฒฝ์šฐ์—๋งŒ ๋ฐ˜ํ™˜
174
+ return response
175
+
176
+ # 1์ˆœ์œ„: ๊ฐ€์žฅ ์ •ํ™•ํ•œ ํŠน์ˆ˜ ํƒœ๊ทธ๋กœ ์ถ”์ถœ ์‹œ๋„
177
+ # ์˜ˆ: <|start_header_id|>assistant<|end_header_id|>์•ˆ๋…•ํ•˜์„ธ์š”...
178
+ # ๋˜๋Š” <|im_start|>assistant์•ˆ๋…•ํ•˜์„ธ์š”...
179
+ assistant_tags = [
180
+ "<|start_header_id|>assistant<|end_header_id|>",
181
+ "<|im_start|>assistant",
182
+ "assistant\n",
183
+ "assistant:"
184
+ ]
185
+ for tag in assistant_tags:
186
+ if tag in full_text:
187
+ parts = full_text.split(tag)
188
+ if len(parts) > 1:
189
+ response = parts[-1].strip()
190
+ # ์ถ”๊ฐ€ ์ •๋ฆฌ: ํŠน์ˆ˜ ํ† ํฐ ์ œ๊ฑฐ
191
+ response = response.replace("<|im_end|>", "").strip()
192
+ logger.info(f"โœ… ์„ฑ๊ณต: '{tag}' ํƒœ๊ทธ๋กœ ์‘๋‹ต ์ถ”์ถœ")
193
+ logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
194
+ if response: # ๋นˆ ๋ฌธ์ž์—ด์ด ์•„๋‹Œ ๊ฒฝ์šฐ์—๋งŒ ๋ฐ˜ํ™˜
195
+ return response
196
+
197
+ # 2์ˆœ์œ„: ๊ฐ„๋‹จํ•œ ํ‚ค์›Œ๋“œ๋กœ ์ถ”์ถœ ์‹œ๋„
198
+ # ์˜ˆ: ... user ์•ˆ๋…•ํ•˜์„ธ์š” assistant ์•ˆ๋…•ํ•˜์„ธ์š” ...
199
+ if "assistant" in full_text:
200
+ parts = full_text.split("assistant")
201
+ if len(parts) > 1:
202
+ response = parts[-1].strip()
203
+ response = response.replace("<|im_end|>", "").strip()
204
+ logger.info("โœ… ์„ฑ๊ณต: 'assistant' ํ‚ค์›Œ๋“œ๋กœ ์‘๋‹ต ์ถ”์ถœ")
205
+ logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
206
+ if response: # ๋นˆ ๋ฌธ์ž์—ด์ด ์•„๋‹Œ ๊ฒฝ์šฐ์—๋งŒ ๋ฐ˜ํ™˜
207
+ return response
208
+
209
+ # 3์ˆœ์œ„: ํ”„๋กฌํ”„ํŠธ๊ฐ€ ์—†๋Š” ๊ฒฝ์šฐ, ์ „์ฒด ํ…์ŠคํŠธ์—์„œ ๋ถˆํ•„์š”ํ•œ ๋ถ€๋ถ„ ์ œ๊ฑฐ
210
+ clean_text = full_text.strip()
211
+ # ์ผ๋ฐ˜์ ์ธ ํ”„๋กฌํ”„ํŠธ ํŒจํ„ด ์ œ๊ฑฐ ์‹œ๋„
212
+ patterns_to_remove = [
213
+ "<|im_start|>user\n",
214
+ "<|im_end|>",
215
+ "<image>",
216
+ "user\n",
217
+ "assistant\n"
218
+ ]
219
+
220
+ for pattern in patterns_to_remove:
221
+ clean_text = clean_text.replace(pattern, "")
222
+
223
+ clean_text = clean_text.strip()
224
+
225
+ if clean_text and clean_text != full_text:
226
+ logger.info("โœ… ์„ฑ๊ณต: ํŒจํ„ด ์ œ๊ฑฐ๋กœ ์‘๋‹ต ์ •๋ฆฌ")
227
+ logger.info(f"์ •๋ฆฌ๋œ ์‘๋‹ต: {clean_text}")
228
+ return clean_text
229
+
230
+ logger.warning("โš ๏ธ ๊ฒฝ๊ณ : ์‘๋‹ต์—์„œ assistant ๋ถ€๋ถ„์„ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค. ์ „์ฒด ํ…์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.")
231
+ logger.info(f"์ตœ์ข… ๋ฐ˜ํ™˜ ํ…์ŠคํŠธ: {full_text}")
232
+ return full_text
233
+
234
+ def get_model_info(self) -> Dict[str, Any]:
235
+ return {"model_name": self.model_name, "display_name": self.display_name, "description": self.description, "language": self.language, "model_size": self.model_size, "local_path": self.local_path, "multimodal": self.multimodal}
lily_llm_api/models/kanana_1_5_v_3b_instruct_250819_2008.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Kanana-1.5-v-3b-instruct ๋ชจ๋ธ ํ”„๋กœํ•„ (๋‹จ์ˆœ ๋กœ๋”ฉ ์ตœ์ข…๋ณธ)
4
+ """
5
+ import sys
6
+ from typing import Dict, Any, Tuple
7
+ import torch
8
+ import logging
9
+ from transformers import AutoTokenizer
10
+ import os
11
+ from dotenv import load_dotenv
12
+ from pathlib import Path
13
+
14
+ load_dotenv()
15
+
16
+ HF_TOKEN = os.getenv("HF_TOKEN")
17
+
18
+ logger = logging.getLogger(__name__)
19
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
20
+
21
+ class Kanana15V3bInstructProfile:
22
+ """Kanana-1.5-v-3b-instruct ๋ชจ๋ธ ํ”„๋กœํ•„"""
23
+
24
+ def __init__(self):
25
+ # ํ™˜๊ฒฝ ๊ฐ์ง€
26
+ self.is_local = self._detect_local_environment()
27
+
28
+ # ๋ชจ๋ธ ๊ฒฝ๋กœ ์„ค์ •
29
+ if self.is_local:
30
+ self.model_name = "gbrabbit/lily-math-model" # ๋กœ์ปฌ์—์„œ๋„ HF ๋ชจ๋ธ๋ช… ์‚ฌ์šฉ
31
+ self.local_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
32
+ self.display_name = "kanana-1.5-v-3b-instruct"
33
+ else:
34
+ self.model_name = "gbrabbit/lily-math-model" # Hugging Face Hub ๋ชจ๋ธ ๊ฒฝ๋กœ
35
+ self.local_path = None # ์„œ๋ฒ„์—์„œ๋Š” ๋กœ์ปฌ ๊ฒฝ๋กœ ์‚ฌ์šฉ ์•ˆํ•จ
36
+ self.display_name = "kanana-1.5-v-3b-instruct"
37
+
38
+ self.description = "์นด์นด์˜ค ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๋ชจ๋ธ (3.6B) - Math RAG ํŠนํ™”"
39
+ self.language = "ko"
40
+ self.model_size = "3.6B"
41
+ self.multimodal = True
42
+
43
+ def _detect_local_environment(self) -> bool:
44
+ """๋กœ์ปฌ ํ™˜๊ฒฝ์ธ์ง€ ๊ฐ์ง€"""
45
+ import os
46
+
47
+ # ๋กœ์ปฌ ํ™˜๊ฒฝ ๊ฐ์ง€ ์กฐ๊ฑด๋“ค
48
+ local_indicators = [
49
+ os.path.exists('.env'),
50
+ os.path.exists('../.env'),
51
+ os.path.exists('../../.env'),
52
+ os.getenv('IS_LOCAL') == 'true',
53
+ os.getenv('ENVIRONMENT') == 'local',
54
+ os.getenv('DOCKER_ENV') == 'local',
55
+ # Windows ๊ฒฝ๋กœ ํ™•์ธ
56
+ os.path.exists('C:/Project/lily_generate_project/lily_generate_package/.env'),
57
+ ]
58
+
59
+ is_local = any(local_indicators)
60
+ logger.info(f"๐Ÿ” ํ™˜๊ฒฝ ๊ฐ์ง€: {'๋กœ์ปฌ' if is_local else '์„œ๋ฒ„'}")
61
+ return is_local
62
+
63
+ def _load_environment_variables(self):
64
+ """ํ™˜๊ฒฝ๋ณ€์ˆ˜๋ฅผ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค."""
65
+ import os
66
+
67
+ try:
68
+ if self.is_local:
69
+ # ๋กœ์ปฌ ํ™˜๊ฒฝ: .env ํŒŒ์ผ ๋กœ๋“œ
70
+ from dotenv import load_dotenv
71
+
72
+ # ์—ฌ๋Ÿฌ ๊ฒฝ๋กœ์—์„œ .env ํŒŒ์ผ ์ฐพ๊ธฐ
73
+ env_paths = [
74
+ '.env',
75
+ '../.env',
76
+ '../../.env',
77
+ 'C:/Project/lily_generate_project/lily_generate_package/.env',
78
+ ]
79
+
80
+ env_loaded = False
81
+ for env_path in env_paths:
82
+ if os.path.exists(env_path):
83
+ load_dotenv(env_path)
84
+ logger.info(f"โœ… ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋กœ๋“œ๋จ: {env_path}")
85
+ env_loaded = True
86
+ break
87
+
88
+ if not env_loaded:
89
+ logger.warning("โš ๏ธ .env ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค")
90
+ else:
91
+ # ์„œ๋ฒ„ ํ™˜๊ฒฝ: ์‹œ์Šคํ…œ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ์‚ฌ์šฉ
92
+ logger.info("๐ŸŒ ์„œ๋ฒ„ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ์‚ฌ์šฉ")
93
+
94
+ except ImportError:
95
+ logger.warning("โš ๏ธ python-dotenv๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์Œ")
96
+ except Exception as e:
97
+ logger.error(f"โŒ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋กœ๋“œ ์‹คํŒจ: {e}")
98
+
99
+ def load_model(self) -> Tuple[Any, Any]:
100
+ """๋ชจ๋ธ ๋กœ๋“œ (๊ณต์‹์ ์ธ ๋ฐฉ๋ฒ• + ์ ˆ๋Œ€ ๊ฒฝ๋กœ sys.path ์ˆ˜์ • ์ตœ์ข…๋ณธ)"""
101
+ logger.info(f"๐Ÿ“ฅ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์ค‘ (๊ณต์‹ ๋ฐฉ๋ฒ•)...")
102
+
103
+ import sys
104
+ from pathlib import Path
105
+ import os # <<< os ๋ชจ๋“ˆ ์ถ”๊ฐ€
106
+
107
+ # self.local_path๋ฅผ ์ ˆ๋Œ€ ๊ฒฝ๋กœ๋กœ ๋ณ€ํ™˜
108
+ absolute_model_path = os.path.abspath(self.local_path)
109
+
110
+ use_local = Path(absolute_model_path).exists() and any(Path(absolute_model_path).iterdir())
111
+ # model_path ๋ณ€์ˆ˜์— ์ ˆ๋Œ€ ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉ
112
+ model_path = absolute_model_path if use_local else self.model_name
113
+
114
+ try:
115
+ from transformers import AutoModelForVision2Seq, AutoProcessor
116
+
117
+ logger.info(f"๐Ÿ” ๋ชจ๋ธ ๊ฒฝ๋กœ: {model_path} (local={'yes' if use_local else 'no'})")
118
+
119
+ if use_local and model_path not in sys.path:
120
+ logger.info(f" -> sys.path์— ์ž„์‹œ ๊ฒฝ๋กœ ์ถ”๊ฐ€: {model_path}")
121
+ sys.path.insert(0, model_path)
122
+
123
+ processor = AutoProcessor.from_pretrained(
124
+ model_path,
125
+ trust_remote_code=True,
126
+ local_files_only=use_local
127
+ )
128
+
129
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
130
+ selected_dtype = torch.float16
131
+ # CPU bfloat16 ์ง€์› ์—ฌ๋ถ€ ์ฒดํฌ ๋กœ์ง ์ˆ˜์ •
132
+ if device == 'cpu' and not (hasattr(torch.backends, 'mkl') and torch.backends.mkl.is_available() and hasattr(torch, 'float16')):
133
+ selected_dtype = torch.float16
134
+
135
+ model = AutoModelForVision2Seq.from_pretrained(
136
+ model_path,
137
+ trust_remote_code=True,
138
+ torch_dtype=selected_dtype,
139
+ local_files_only=use_local,
140
+ ).to(device)
141
+
142
+ model.eval()
143
+
144
+ logger.info(f"โœ… {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต! (device={device}, dtype={selected_dtype})")
145
+ return model, processor
146
+
147
+ except Exception as e:
148
+ logger.error(f"โŒ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
149
+ raise
150
+
151
+ finally:
152
+ if use_local and model_path in sys.path:
153
+ logger.info(f" -> sys.path์—์„œ ์ž„์‹œ ๊ฒฝ๋กœ ์ œ๊ฑฐ: {model_path}")
154
+ sys.path.remove(model_path)
155
+
156
+
157
+ def get_generation_config(self) -> Dict[str, Any]:
158
+ # ๋ชจ๋ธ ํŒŒ๋ผ๋ฏธํ„ฐ ์ตœ์ ํ™” ์„ค์ •, max_new_tokens : ์ƒ์„ฑ๋˜๋Š” ํ…์ŠคํŠธ ๊ธธ์ด ์ตœ๋Œ€๊ฐ’ (์ด๋ฏธ์ง€ ์„ค๋ช…์„ ์œ„ํ•ด ์ฆ๊ฐ€)
159
+ return {"max_new_tokens": 50, "temperature": 0.7, "do_sample": True, "top_k": 40, "top_p": 0.9, "repetition_penalty": 1.1}
160
+
161
+ def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
162
+ """
163
+ ๋‹ค์–‘ํ•œ ์‘๋‹ต ํ˜•์‹์„ ์ฒ˜๋ฆฌํ•  ์ˆ˜ ์žˆ๋Š” ๋” ๋˜‘๋˜‘ํ•œ ์‘๋‹ต ์ถ”์ถœ ํ•จ์ˆ˜
164
+ """
165
+ logger.info(f"--- ์‘๋‹ต ์ถ”์ถœ ์‹œ์ž‘ ---")
166
+ logger.info(f"์ „์ฒด ์ƒ์„ฑ ํ…์ŠคํŠธ (Raw): \n---\n{full_text}\n---")
167
+
168
+ # ํ”„๋กฌํ”„ํŠธ๊ฐ€ ์ œ๊ณต๋œ ๊ฒฝ์šฐ ์ด๋ฅผ ์ œ๊ฑฐ
169
+ if formatted_prompt and formatted_prompt in full_text:
170
+ response = full_text.replace(formatted_prompt, "").strip()
171
+ logger.info(f"โœ… ์„ฑ๊ณต: ํ”„๋กฌํ”„ํŠธ ์ œ๊ฑฐ๋กœ ์‘๋‹ต ์ถ”์ถœ")
172
+ logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
173
+ if response: # ๋นˆ ๋ฌธ์ž์—ด์ด ์•„๋‹Œ ๊ฒฝ์šฐ์—๋งŒ ๋ฐ˜ํ™˜
174
+ return response
175
+
176
+ # 1์ˆœ์œ„: ๊ฐ€์žฅ ์ •ํ™•ํ•œ ํŠน์ˆ˜ ํƒœ๊ทธ๋กœ ์ถ”์ถœ ์‹œ๋„
177
+ # ์˜ˆ: <|start_header_id|>assistant<|end_header_id|>์•ˆ๋…•ํ•˜์„ธ์š”...
178
+ # ๋˜๋Š” <|im_start|>assistant์•ˆ๋…•ํ•˜์„ธ์š”...
179
+ assistant_tags = [
180
+ "<|start_header_id|>assistant<|end_header_id|>",
181
+ "<|im_start|>assistant",
182
+ "assistant\n",
183
+ "assistant:"
184
+ ]
185
+ for tag in assistant_tags:
186
+ if tag in full_text:
187
+ parts = full_text.split(tag)
188
+ if len(parts) > 1:
189
+ response = parts[-1].strip()
190
+ # ์ถ”๊ฐ€ ์ •๋ฆฌ: ํŠน์ˆ˜ ํ† ํฐ ์ œ๊ฑฐ
191
+ response = response.replace("<|im_end|>", "").strip()
192
+ logger.info(f"โœ… ์„ฑ๊ณต: '{tag}' ํƒœ๊ทธ๋กœ ์‘๋‹ต ์ถ”์ถœ")
193
+ logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
194
+ if response: # ๋นˆ ๋ฌธ์ž์—ด์ด ์•„๋‹Œ ๊ฒฝ์šฐ์—๋งŒ ๋ฐ˜ํ™˜
195
+ return response
196
+
197
+ # 2์ˆœ์œ„: ๊ฐ„๋‹จํ•œ ํ‚ค์›Œ๋“œ๋กœ ์ถ”์ถœ ์‹œ๋„
198
+ # ์˜ˆ: ... user ์•ˆ๋…•ํ•˜์„ธ์š” assistant ์•ˆ๋…•ํ•˜์„ธ์š” ...
199
+ if "assistant" in full_text:
200
+ parts = full_text.split("assistant")
201
+ if len(parts) > 1:
202
+ response = parts[-1].strip()
203
+ response = response.replace("<|im_end|>", "").strip()
204
+ logger.info("โœ… ์„ฑ๊ณต: 'assistant' ํ‚ค์›Œ๋“œ๋กœ ์‘๋‹ต ์ถ”์ถœ")
205
+ logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
206
+ if response: # ๋นˆ ๋ฌธ์ž์—ด์ด ์•„๋‹Œ ๊ฒฝ์šฐ์—๋งŒ ๋ฐ˜ํ™˜
207
+ return response
208
+
209
+ # 3์ˆœ์œ„: ํ”„๋กฌํ”„ํŠธ๊ฐ€ ์—†๋Š” ๊ฒฝ์šฐ, ์ „์ฒด ํ…์ŠคํŠธ์—์„œ ๋ถˆํ•„์š”ํ•œ ๋ถ€๋ถ„ ์ œ๊ฑฐ
210
+ clean_text = full_text.strip()
211
+ # ์ผ๋ฐ˜์ ์ธ ํ”„๋กฌํ”„ํŠธ ํŒจํ„ด ์ œ๊ฑฐ ์‹œ๋„
212
+ patterns_to_remove = [
213
+ "<|im_start|>user\n",
214
+ "<|im_end|>",
215
+ "<image>",
216
+ "user\n",
217
+ "assistant\n"
218
+ ]
219
+
220
+ for pattern in patterns_to_remove:
221
+ clean_text = clean_text.replace(pattern, "")
222
+
223
+ clean_text = clean_text.strip()
224
+
225
+ if clean_text and clean_text != full_text:
226
+ logger.info("โœ… ์„ฑ๊ณต: ํŒจํ„ด ์ œ๊ฑฐ๋กœ ์‘๋‹ต ์ •๋ฆฌ")
227
+ logger.info(f"์ •๋ฆฌ๋œ ์‘๋‹ต: {clean_text}")
228
+ return clean_text
229
+
230
+ logger.warning("โš ๏ธ ๊ฒฝ๊ณ : ์‘๋‹ต์—์„œ assistant ๋ถ€๋ถ„์„ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค. ์ „์ฒด ํ…์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.")
231
+ logger.info(f"์ตœ์ข… ๋ฐ˜ํ™˜ ํ…์ŠคํŠธ: {full_text}")
232
+ return full_text
233
+
234
+ def get_model_info(self) -> Dict[str, Any]:
235
+ return {"model_name": self.model_name, "display_name": self.display_name, "description": self.description, "language": self.language, "model_size": self.model_size, "local_path": self.local_path, "multimodal": self.multimodal}
lily_llm_core/context_manager.py ADDED
@@ -0,0 +1,410 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž (Context Manager)
4
+ ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ์™€ ๋‹จ๊ธฐ ๊ธฐ์–ต์„ ๊ด€๋ฆฌํ•˜๋Š” ์‹œ์Šคํ…œ
5
+ """
6
+
7
+ import logging
8
+ import time
9
+ from typing import List, Dict, Any, Optional, Tuple
10
+ from dataclasses import dataclass
11
+ from collections import deque
12
+ import json
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ @dataclass
17
+ class ConversationTurn:
18
+ """๋Œ€ํ™” ํ„ด์„ ๋‚˜ํƒ€๋‚ด๋Š” ๋ฐ์ดํ„ฐ ํด๋ž˜์Šค"""
19
+ role: str # 'user' ๋˜๋Š” 'assistant'
20
+ content: str
21
+ timestamp: float
22
+ message_id: str
23
+ metadata: Optional[Dict[str, Any]] = None
24
+
25
+ class ContextManager:
26
+ """๋Œ€ํ™” ์ปจํ…์ŠคํŠธ๋ฅผ ๊ด€๋ฆฌํ•˜๋Š” ํด๋ž˜์Šค"""
27
+
28
+ def __init__(self,
29
+ max_tokens: int = 4000,
30
+ max_turns: int = 20,
31
+ strategy: str = "sliding_window"):
32
+ """
33
+ Args:
34
+ max_tokens: ์ตœ๋Œ€ ํ† ํฐ ์ˆ˜
35
+ max_turns: ์ตœ๋Œ€ ๋Œ€ํ™” ํ„ด ์ˆ˜
36
+ strategy: ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ ์ „๋žต ('sliding_window', 'priority_keep', 'circular')
37
+ """
38
+ self.max_tokens = max_tokens
39
+ self.max_turns = max_turns
40
+ self.strategy = strategy
41
+
42
+ # ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ (deque ์‚ฌ์šฉ์œผ๋กœ ํšจ์œจ์ ์ธ ์–‘๋ฐฉํ–ฅ ์ ‘๊ทผ)
43
+ self.conversation_history: deque = deque(maxlen=max_turns * 2)
44
+
45
+ # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ
46
+ self.system_prompt = ""
47
+
48
+ # ์ปจํ…์ŠคํŠธ ํ†ต๊ณ„
49
+ self.total_tokens = 0
50
+ self.current_context_length = 0
51
+
52
+ # ๋ฉ”๋ชจ๋ฆฌ ์ตœ์ ํ™” ์„ค์ •
53
+ self.enable_memory_optimization = True
54
+ self.compression_threshold = 0.8 # 80% ๋„๋‹ฌ ์‹œ ์••์ถ• ์‹œ์ž‘
55
+
56
+ logger.info(f"๐Ÿ”ง ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž ์ดˆ๊ธฐํ™”: max_tokens={max_tokens}, strategy={strategy}")
57
+
58
+ def set_system_prompt(self, prompt: str):
59
+ """์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ์„ค์ •"""
60
+ self.system_prompt = prompt
61
+ logger.info(f"๐Ÿ“ ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ์„ค์ •: {len(prompt)} ๋ฌธ์ž")
62
+
63
+ def add_user_message(self, content: str, message_id: str = None, metadata: Dict[str, Any] = None) -> str:
64
+ """์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€"""
65
+ if not message_id:
66
+ message_id = f"user_{int(time.time() * 1000)}"
67
+
68
+ turn = ConversationTurn(
69
+ role="user",
70
+ content=content,
71
+ timestamp=time.time(),
72
+ message_id=message_id,
73
+ metadata=metadata or {}
74
+ )
75
+
76
+ self.conversation_history.append(turn)
77
+ self._update_context_stats()
78
+ self._optimize_context()
79
+
80
+ logger.info(f"๐Ÿ‘ค ์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€: {len(content)} ๋ฌธ์ž (์ด {len(self.conversation_history)} ํ„ด)")
81
+ return message_id
82
+
83
+ def add_assistant_message(self, content: str, message_id: str = None, metadata: Dict[str, Any] = None) -> str:
84
+ """์–ด์‹œ์Šคํ„ดํŠธ ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€"""
85
+ if not message_id:
86
+ message_id = f"assistant_{int(time.time() * 1000)}"
87
+
88
+ turn = ConversationTurn(
89
+ role="assistant",
90
+ content=content,
91
+ timestamp=time.time(),
92
+ message_id=message_id,
93
+ metadata=metadata or {}
94
+ )
95
+
96
+ self.conversation_history.append(turn)
97
+ self._update_context_stats()
98
+ self._optimize_context()
99
+
100
+ logger.info(f"๐Ÿค– ์–ด์‹œ์Šคํ„ดํŠธ ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€: {len(content)} ๋ฌธ์ž (์ด {len(self.conversation_history)} ํ„ด)")
101
+ return message_id
102
+
103
+ def get_context(self, include_system: bool = True, max_length: Optional[int] = None) -> str:
104
+ """ํ˜„์žฌ ์ปจํ…์ŠคํŠธ๋ฅผ ๋ฌธ์ž์—ด๋กœ ๋ฐ˜ํ™˜"""
105
+ context_parts = []
106
+
107
+ # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ํฌํ•จ
108
+ if include_system and self.system_prompt:
109
+ context_parts.append(f"<|im_start|>system\n{self.system_prompt}<|im_end|>")
110
+
111
+ # ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ ํฌํ•จ
112
+ for turn in self.conversation_history:
113
+ if turn.role == "user":
114
+ context_parts.append(f"<|im_start|>user\n{turn.content}<|im_end|>")
115
+ elif turn.role == "assistant":
116
+ context_parts.append(f"<|im_start|>assistant\n{turn.content}<|im_end|>")
117
+
118
+ # ์–ด์‹œ์Šคํ„ดํŠธ ์‘๋‹ต ์‹œ์ž‘ ํ† ํฐ ์ถ”๊ฐ€
119
+ context_parts.append("<|im_start|>assistant\n")
120
+
121
+ context = "\n".join(context_parts)
122
+
123
+ # ๊ธธ์ด ์ œํ•œ ์ ์šฉ
124
+ if max_length and len(context) > max_length:
125
+ context = self._truncate_context(context, max_length)
126
+
127
+ return context
128
+
129
+ def get_context_for_model(self, model_name: str = "default") -> str:
130
+ """๋ชจ๋ธ๋ณ„ ์ตœ์ ํ™”๋œ ์ปจํ…์ŠคํŠธ ๋ฐ˜ํ™˜"""
131
+ # ๋ชจ๋ธ๋ณ„ ํŠน๋ณ„ํ•œ ์ฒ˜๋ฆฌ (ํ•„์š”์‹œ ํ™•์žฅ)
132
+ if "kanana" in model_name.lower():
133
+ return self.get_context(include_system=True)
134
+ elif "llama" in model_name.lower():
135
+ # Llama ํ˜•์‹
136
+ return self._format_for_llama()
137
+ else:
138
+ return self.get_context(include_system=True)
139
+
140
+ def _format_for_llama(self) -> str:
141
+ """Llama ๋ชจ๋ธ์šฉ ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜"""
142
+ context_parts = []
143
+
144
+ if self.system_prompt:
145
+ context_parts.append(f"[INST] {self.system_prompt} [/INST]")
146
+
147
+ for turn in self.conversation_history:
148
+ if turn.role == "user":
149
+ context_parts.append(f"[INST] {turn.content} [/INST]")
150
+ elif turn.role == "assistant":
151
+ context_parts.append(turn.content)
152
+
153
+ return "\n".join(context_parts)
154
+
155
+ def get_recent_context(self, turns: int = 5) -> str:
156
+ """์ตœ๊ทผ N๊ฐœ ํ„ด์˜ ์ปจํ…์ŠคํŠธ๋งŒ ๋ฐ˜ํ™˜"""
157
+ recent_turns = list(self.conversation_history)[-turns:]
158
+ context_parts = []
159
+
160
+ for turn in recent_turns:
161
+ if turn.role == "user":
162
+ context_parts.append(f"<|im_start|>user\n{turn.content}<|im_end|>")
163
+ elif turn.role == "assistant":
164
+ context_parts.append(f"<|im_start|>assistant\n{turn.content}<|im_end|>")
165
+
166
+ context_parts.append("<|im_start|>assistant\n")
167
+ return "\n".join(context_parts)
168
+
169
+ def get_context_summary(self) -> Dict[str, Any]:
170
+ """์ปจํ…์ŠคํŠธ ์š”์•ฝ ์ •๋ณด ๋ฐ˜ํ™˜"""
171
+ return {
172
+ "total_turns": len(self.conversation_history),
173
+ "user_messages": len([t for t in self.conversation_history if t.role == "user"]),
174
+ "assistant_messages": len([t for t in self.conversation_history if t.role == "assistant"]),
175
+ "estimated_tokens": self.total_tokens,
176
+ "context_length": self.current_context_length,
177
+ "memory_usage": len(self.conversation_history) / self.max_turns,
178
+ "oldest_message": self.conversation_history[0].timestamp if self.conversation_history else None,
179
+ "newest_message": self.conversation_history[-1].timestamp if self.conversation_history else None
180
+ }
181
+
182
+ def clear_context(self):
183
+ """์ปจํ…์ŠคํŠธ ์ดˆ๊ธฐํ™”"""
184
+ self.conversation_history.clear()
185
+ self.total_tokens = 0
186
+ self.current_context_length = 0
187
+ logger.info("๐Ÿ—‘๏ธ ์ปจํ…์ŠคํŠธ ์ดˆ๊ธฐํ™” ์™„๋ฃŒ")
188
+
189
+ def remove_message(self, message_id: str) -> bool:
190
+ """ํŠน์ • ๋ฉ”์‹œ์ง€ ์ œ๊ฑฐ"""
191
+ for i, turn in enumerate(self.conversation_history):
192
+ if turn.message_id == message_id:
193
+ removed_turn = self.conversation_history.pop(i)
194
+ self._update_context_stats()
195
+ logger.info(f"๐Ÿ—‘๏ธ ๋ฉ”์‹œ์ง€ ์ œ๊ฑฐ: {message_id}")
196
+ return True
197
+ return False
198
+
199
+ def edit_message(self, message_id: str, new_content: str) -> bool:
200
+ """๋ฉ”์‹œ์ง€ ๋‚ด์šฉ ์ˆ˜์ •"""
201
+ for turn in self.conversation_history:
202
+ if turn.message_id == message_id:
203
+ turn.content = new_content
204
+ turn.timestamp = time.time()
205
+ self._update_context_stats()
206
+ logger.info(f"โœ๏ธ ๋ฉ”์‹œ์ง€ ์ˆ˜์ •: {message_id}")
207
+ return True
208
+ return False
209
+
210
+ def search_context(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
211
+ """์ปจํ…์ŠคํŠธ ๋‚ด์—์„œ ๊ฒ€์ƒ‰"""
212
+ results = []
213
+ query_lower = query.lower()
214
+
215
+ for turn in self.conversation_history:
216
+ if query_lower in turn.content.lower():
217
+ results.append({
218
+ "message_id": turn.message_id,
219
+ "role": turn.role,
220
+ "content": turn.content,
221
+ "timestamp": turn.timestamp,
222
+ "relevance_score": self._calculate_relevance(query, turn.content)
223
+ })
224
+
225
+ # ๊ด€๋ จ์„ฑ ์ ์ˆ˜๋กœ ์ •๋ ฌ
226
+ results.sort(key=lambda x: x["relevance_score"], reverse=True)
227
+ return results[:max_results]
228
+
229
+ def _calculate_relevance(self, query: str, content: str) -> float:
230
+ """๊ฐ„๋‹จํ•œ ๊ด€๋ จ์„ฑ ์ ์ˆ˜ ๊ณ„์‚ฐ"""
231
+ query_words = set(query.lower().split())
232
+ content_words = set(content.lower().split())
233
+
234
+ if not query_words:
235
+ return 0.0
236
+
237
+ intersection = query_words.intersection(content_words)
238
+ return len(intersection) / len(query_words)
239
+
240
+ def _update_context_stats(self):
241
+ """์ปจํ…์ŠคํŠธ ํ†ต๊ณ„ ์—…๋ฐ์ดํŠธ"""
242
+ self.current_context_length = len(self.get_context())
243
+ # ๊ฐ„๋‹จํ•œ ํ† ํฐ ์ถ”์ • (์‹ค์ œ ํ† ํฌ๋‚˜์ด์ € ์‚ฌ์šฉ ๊ถŒ์žฅ)
244
+ self.total_tokens = self.current_context_length // 4
245
+
246
+ def _optimize_context(self):
247
+ """์ปจํ…์ŠคํŠธ ์ตœ์ ํ™”"""
248
+ if not self.enable_memory_optimization:
249
+ return
250
+
251
+ # ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰์ด ์ž„๊ณ„๊ฐ’์„ ์ดˆ๊ณผํ•˜๋ฉด ์••์ถ• ์‹œ์ž‘
252
+ if len(self.conversation_history) / self.max_turns > self.compression_threshold:
253
+ self._compress_context()
254
+
255
+ def _compress_context(self):
256
+ """์ปจํ…์ŠคํŠธ ์••์ถ• (์ค‘์š”ํ•œ ๋ฉ”์‹œ์ง€ ์œ ์ง€)"""
257
+ if len(self.conversation_history) <= self.max_turns:
258
+ return
259
+
260
+ logger.info(f"๐Ÿ—œ๏ธ ์ปจํ…์ŠคํŠธ ์••์ถ• ์‹œ์ž‘: {len(self.conversation_history)} โ†’ {self.max_turns}")
261
+
262
+ # ์ „๋žต์— ๋”ฐ๋ฅธ ์••์ถ•
263
+ if self.strategy == "sliding_window":
264
+ # ์Šฌ๋ผ์ด๋”ฉ ์œˆ๋„์šฐ: ์ตœ๊ทผ ๋ฉ”์‹œ์ง€ ์šฐ์„ 
265
+ while len(self.conversation_history) > self.max_turns:
266
+ self.conversation_history.popleft()
267
+
268
+ elif self.strategy == "priority_keep":
269
+ # ์šฐ์„ ์ˆœ์œ„ ๊ธฐ๋ฐ˜: ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ์™€ ์ตœ๊ทผ ๋ฉ”์‹œ์ง€ ์šฐ์„ 
270
+ # ์ฒซ ๋ฒˆ์งธ์™€ ๋งˆ์ง€๋ง‰ ๋ฉ”์‹œ์ง€๋Š” ์œ ์ง€
271
+ if len(self.conversation_history) > self.max_turns:
272
+ # ์ค‘๊ฐ„ ๋ฉ”์‹œ์ง€๋“ค ์ค‘ ์ผ๋ถ€ ์ œ๊ฑฐ
273
+ middle_start = self.max_turns // 2
274
+ middle_end = len(self.conversation_history) - self.max_turns // 2
275
+
276
+ # ์ค‘๊ฐ„ ๋ถ€๋ถ„์„ ์š”์•ฝ์œผ๋กœ ๋Œ€์ฒด
277
+ removed_turns = list(self.conversation_history)[middle_start:middle_end]
278
+ summary_content = f"[์ด์ „ {len(removed_turns)}๊ฐœ ๋ฉ”์‹œ์ง€ ์š”์•ฝ: {len(removed_turns)}๊ฐœ ๋Œ€ํ™” ํ„ด]"
279
+
280
+ # ์ค‘๊ฐ„ ๋ถ€๋ถ„ ์ œ๊ฑฐ
281
+ for _ in range(middle_end - middle_start):
282
+ self.conversation_history.pop(middle_start)
283
+
284
+ # ์š”์•ฝ ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€
285
+ summary_turn = ConversationTurn(
286
+ role="system",
287
+ content=summary_content,
288
+ timestamp=time.time(),
289
+ message_id=f"summary_{int(time.time() * 1000)}"
290
+ )
291
+ self.conversation_history.insert(middle_start, summary_turn)
292
+
293
+ elif self.strategy == "circular":
294
+ # ์ˆœํ™˜ ๋ฒ„ํผ: ๊ฐ€์žฅ ์˜ค๋ž˜๋œ ๋ฉ”์‹œ์ง€ ์ œ๊ฑฐ
295
+ while len(self.conversation_history) > self.max_turns:
296
+ self.conversation_history.popleft()
297
+
298
+ self._update_context_stats()
299
+ logger.info(f"โœ… ์ปจํ…์ŠคํŠธ ์••์ถ• ์™„๋ฃŒ: {len(self.conversation_history)} ํ„ด")
300
+
301
+ def _truncate_context(self, context: str, max_length: int) -> str:
302
+ """์ปจํ…์ŠคํŠธ ๊ธธ์ด ์ œํ•œ"""
303
+ if len(context) <= max_length:
304
+ return context
305
+
306
+ # ๊ฐ€์žฅ ์ตœ๊ทผ ๋ฉ”์‹œ์ง€๋ถ€ํ„ฐ ์œ ์ง€
307
+ truncated_context = context[-max_length:]
308
+
309
+ # ๋ฉ”์‹œ์ง€ ๊ฒฝ๊ณ„ ํ™•์ธ
310
+ if not truncated_context.startswith("<|im_start|>"):
311
+ # ๋ฉ”์‹œ์ง€ ๊ฒฝ๊ณ„๋ฅผ ์ฐพ์•„์„œ ์ž๋ฅด๊ธฐ
312
+ start_idx = truncated_context.find("<|im_start|>")
313
+ if start_idx != -1:
314
+ truncated_context = truncated_context[start_idx:]
315
+
316
+ return truncated_context
317
+
318
+ def export_context(self, file_path: str = None) -> str:
319
+ """์ปจํ…์ŠคํŠธ๋ฅผ ํŒŒ์ผ๋กœ ๋‚ด๋ณด๋‚ด๊ธฐ"""
320
+ if not file_path:
321
+ file_path = f"context_export_{int(time.time())}.json"
322
+
323
+ export_data = {
324
+ "export_timestamp": time.time(),
325
+ "system_prompt": self.system_prompt,
326
+ "conversation_history": [
327
+ {
328
+ "role": turn.role,
329
+ "content": turn.content,
330
+ "timestamp": turn.timestamp,
331
+ "message_id": turn.message_id,
332
+ "metadata": turn.metadata
333
+ }
334
+ for turn in self.conversation_history
335
+ ],
336
+ "context_stats": self.get_context_summary()
337
+ }
338
+
339
+ with open(file_path, 'w', encoding='utf-8') as f:
340
+ json.dump(export_data, f, ensure_ascii=False, indent=2)
341
+
342
+ logger.info(f"๐Ÿ’พ ์ปจํ…์ŠคํŠธ ๋‚ด๋ณด๋‚ด๊ธฐ ์™„๋ฃŒ: {file_path}")
343
+ return file_path
344
+
345
+ def import_context(self, file_path: str) -> bool:
346
+ """ํŒŒ์ผ์—์„œ ์ปจํ…์ŠคํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ"""
347
+ try:
348
+ with open(file_path, 'r', encoding='utf-8') as f:
349
+ import_data = json.load(f)
350
+
351
+ # ๊ธฐ์กด ์ปจํ…์ŠคํŠธ ์ดˆ๊ธฐํ™”
352
+ self.clear_context()
353
+
354
+ # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ๋ณต์›
355
+ if "system_prompt" in import_data:
356
+ self.system_prompt = import_data["system_prompt"]
357
+
358
+ # ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ ๋ณต์›
359
+ if "conversation_history" in import_data:
360
+ for turn_data in import_data["conversation_history"]:
361
+ turn = ConversationTurn(
362
+ role=turn_data["role"],
363
+ content=turn_data["content"],
364
+ timestamp=turn_data["timestamp"],
365
+ message_id=turn_data["message_id"],
366
+ metadata=turn_data.get("metadata", {})
367
+ )
368
+ self.conversation_history.append(turn)
369
+
370
+ self._update_context_stats()
371
+ logger.info(f"๐Ÿ“ฅ ์ปจํ…์ŠคํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ ์™„๋ฃŒ: {file_path}")
372
+ return True
373
+
374
+ except Exception as e:
375
+ logger.error(f"โŒ ์ปจํ…์ŠคํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ ์‹คํŒจ: {e}")
376
+ return False
377
+
378
+ def get_memory_efficiency(self) -> Dict[str, float]:
379
+ """๋ฉ”๋ชจ๋ฆฌ ํšจ์œจ์„ฑ ์ง€ํ‘œ ๋ฐ˜ํ™˜"""
380
+ return {
381
+ "context_utilization": len(self.conversation_history) / self.max_turns,
382
+ "token_efficiency": self.total_tokens / self.max_tokens if self.max_tokens > 0 else 0,
383
+ "compression_ratio": 1.0 - (len(self.conversation_history) / (self.max_turns * 2)),
384
+ "memory_fragmentation": self._calculate_fragmentation()
385
+ }
386
+
387
+ def _calculate_fragmentation(self) -> float:
388
+ """๋ฉ”๋ชจ๋ฆฌ ๋‹จํŽธํ™” ์ •๋„ ๊ณ„์‚ฐ"""
389
+ if len(self.conversation_history) <= 1:
390
+ return 0.0
391
+
392
+ # ์—ฐ์†๋œ ๋ฉ”์‹œ์ง€ ๊ฐ„์˜ ์‹œ๊ฐ„ ๊ฐ„๊ฒฉ์œผ๋กœ ๋‹จํŽธํ™” ๊ณ„์‚ฐ
393
+ timestamps = [turn.timestamp for turn in self.conversation_history]
394
+ intervals = [timestamps[i+1] - timestamps[i] for i in range(len(timestamps)-1)]
395
+
396
+ if not intervals:
397
+ return 0.0
398
+
399
+ avg_interval = sum(intervals) / len(intervals)
400
+ variance = sum((x - avg_interval) ** 2 for x in intervals) / len(intervals)
401
+
402
+ # ์ •๊ทœํ™”๋œ ๋‹จํŽธํ™” ์ ์ˆ˜ (0-1)
403
+ return min(1.0, variance / (avg_interval ** 2) if avg_interval > 0 else 0.0)
404
+
405
+ # ์ „์—ญ ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž ์ธ์Šคํ„ด์Šค
406
+ context_manager = ContextManager()
407
+
408
+ def get_context_manager() -> ContextManager:
409
+ """์ „์—ญ ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž ๋ฐ˜ํ™˜"""
410
+ return context_manager
lily_llm_core/document_processor.py CHANGED
@@ -90,12 +90,12 @@ class DocumentProcessor:
90
  logger.error(f"โŒ OCR ๋ฆฌ๋” ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}")
91
  self.ocr_reader = None
92
 
93
- # ์ˆ˜์‹ ์ถ”์ถœ ์—”์ง„ ์„ค์ •
94
- if formula_ocr_engine in ['mathpix', 'latexocr']:
95
  try:
96
- from .formula_extractor import image_to_latex_mathpix, run_latex_ocr
97
  self.formula_extractor_available = True
98
- logger.info(f"โœ… ์ˆ˜์‹ ์ถ”์ถœ ์—”์ง„ ์„ค์ •: {formula_ocr_engine}")
99
  except ImportError:
100
  self.formula_extractor_available = False
101
  logger.warning(f"โš ๏ธ ์ˆ˜์‹ ์ถ”์ถœ ์—”์ง„ {formula_ocr_engine} ์‚ฌ์šฉ ๋ถˆ๊ฐ€, EasyOCR๋กœ ๋Œ€์ฒด")
 
90
  logger.error(f"โŒ OCR ๋ฆฌ๋” ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}")
91
  self.ocr_reader = None
92
 
93
+ # ์ˆ˜์‹ ์ถ”์ถœ ์—”์ง„ ์„ค์ • (LaTeX-OCR ๋น„ํ™œ์„ฑํ™”๋จ)
94
+ if formula_ocr_engine in ['mathpix']: # 'latexocr' ์ œ๊ฑฐ
95
  try:
96
+ from .formula_extractor import image_to_latex_mathpix # run_latex_ocr ์ œ๊ฑฐ
97
  self.formula_extractor_available = True
98
+ logger.info(f"โœ… ์ˆ˜์‹ ์ถ”์ถœ ์—”์ง„ ์„ค์ •: {formula_ocr_engine} (LaTeX-OCR ๋น„ํ™œ์„ฑํ™”๋จ)")
99
  except ImportError:
100
  self.formula_extractor_available = False
101
  logger.warning(f"โš ๏ธ ์ˆ˜์‹ ์ถ”์ถœ ์—”์ง„ {formula_ocr_engine} ์‚ฌ์šฉ ๋ถˆ๊ฐ€, EasyOCR๋กœ ๋Œ€์ฒด")
lily_llm_core/hybrid_rag_processor.py CHANGED
@@ -14,9 +14,9 @@ from .rag_processor import rag_processor
14
  from .image_rag_processor import image_rag_processor
15
  from .latex_rag_processor import latex_rag_processor
16
 
17
- # ์ƒˆ๋กœ์šด LaTeX-OCR + FAISS ์‹œ์Šคํ…œ
18
- from latex_ocr_faiss_integrated import LatexOCRFAISSIntegrated
19
- from latex_ocr_faiss_simple import LatexOCRFAISSSimple
20
 
21
  # ๋กœ๊น… ์„ค์ •
22
  logging.basicConfig(level=logging.INFO)
@@ -31,19 +31,16 @@ class HybridRAGProcessor:
31
  self.image_rag = image_rag_processor
32
  self.latex_rag = latex_rag_processor
33
 
34
- # LaTeX-OCR + FAISS ์‹œ์Šคํ…œ
35
  self.latex_ocr_faiss_simple = None
36
  self.latex_ocr_faiss_integrated = None
37
- self._init_latex_ocr_faiss()
38
 
39
  def _init_latex_ocr_faiss(self):
40
- """LaTeX-OCR + FAISS ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™”"""
41
- try:
42
- self.latex_ocr_faiss_simple = LatexOCRFAISSSimple()
43
- self.latex_ocr_faiss_integrated = LatexOCRFAISSIntegrated()
44
- logger.info("โœ… LaTeX-OCR + FAISS ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™” ์™„๋ฃŒ")
45
- except Exception as e:
46
- logger.error(f"โŒ LaTeX-OCR + FAISS ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}")
47
 
48
  def process_document_hybrid(self, file_path: str, user_id: str, document_id: str) -> Dict[str, Any]:
49
  """๋ฌธ์„œ๋ฅผ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ๋กœ ์ฒ˜๋ฆฌ"""
@@ -92,17 +89,12 @@ class HybridRAGProcessor:
92
  logger.error(f"โŒ LaTeX ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
93
  results["latex_processing"] = {"success": False, "error": str(e)}
94
 
95
- # 4. LaTeX-OCR + FAISS ์ฒ˜๋ฆฌ (์ƒˆ๋กœ์šด)
96
- try:
97
- if self.latex_ocr_faiss_integrated:
98
- latex_ocr_result = self.latex_ocr_faiss_integrated.process_pdf_with_latex(file_path, user_id)
99
- results["latex_ocr_faiss_processing"] = latex_ocr_result
100
- logger.info(f"โœ… LaTeX-OCR + FAISS ์ฒ˜๋ฆฌ ์™„๋ฃŒ: {latex_ocr_result.get('latex_count', 0)}๊ฐœ ์ˆ˜์‹")
101
- else:
102
- results["latex_ocr_faiss_processing"] = {"success": False, "error": "LaTeX-OCR + FAISS ์‹œ์Šคํ…œ์ด ์ดˆ๊ธฐํ™”๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."}
103
- except Exception as e:
104
- logger.error(f"โŒ LaTeX-OCR + FAISS ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
105
- results["latex_ocr_faiss_processing"] = {"success": False, "error": str(e)}
106
 
107
  # ์ „์ฒด ์„ฑ๊ณต ์—ฌ๋ถ€ ํŒ๋‹จ
108
  success_count = sum(1 for key, value in results.items()
@@ -123,7 +115,7 @@ class HybridRAGProcessor:
123
 
124
  def generate_hybrid_response(self, query: str, user_id: str, document_id: str,
125
  use_text: bool = True, use_image: bool = True,
126
- use_latex: bool = True, use_latex_ocr: bool = True,
127
  max_length: Optional[int] = None,
128
  temperature: Optional[float] = None,
129
  top_p: Optional[float] = None,
@@ -182,22 +174,15 @@ class HybridRAGProcessor:
182
  logger.error(f"โŒ LaTeX RAG ์‘๋‹ต ์ƒ์„ฑ ์‹คํŒจ: {e}")
183
  responses["latex"] = {"success": False, "error": str(e)}
184
 
185
- # 4. LaTeX-OCR + FAISS ์‘๋‹ต (์ƒˆ๋กœ์šด)
186
- if use_latex_ocr and self.latex_ocr_faiss_integrated:
187
- try:
188
- latex_ocr_response = self.latex_ocr_faiss_integrated.search_formulas(
189
- query, user_id, document_id, k=5
190
- )
191
- responses["latex_ocr_faiss"] = latex_ocr_response
192
- if latex_ocr_response.get("success"):
193
- context = "\n".join([f"์ˆ˜์‹: {result['formula']} (์œ ์‚ฌ๋„: {result['similarity']:.3f})"
194
- for result in latex_ocr_response.get('results', [])])
195
- all_contexts.append(f"[LaTeX-OCR+FAISS] {context}")
196
- all_sources.extend(latex_ocr_response.get('results', []))
197
- logger.info("โœ… LaTeX-OCR + FAISS ์‘๋‹ต ์ƒ์„ฑ ์™„๋ฃŒ")
198
- except Exception as e:
199
- logger.error(f"โŒ LaTeX-OCR + FAISS ์‘๋‹ต ์ƒ์„ฑ ์‹คํŒจ: {e}")
200
- responses["latex_ocr_faiss"] = {"success": False, "error": str(e)}
201
 
202
  # ํ†ตํ•ฉ ์‘๋‹ต ์ƒ์„ฑ
203
  success_count = sum(1 for response in responses.values() if response.get('success', False))
@@ -269,6 +254,12 @@ class HybridRAGProcessor:
269
  except Exception as e:
270
  logger.error(f"LaTeX ๋ฌธ์„œ ์ •๋ณด ์กฐํšŒ ์‹คํŒจ: {e}")
271
 
 
 
 
 
 
 
272
  return {
273
  "success": True,
274
  "document_id": document_id,
 
14
  from .image_rag_processor import image_rag_processor
15
  from .latex_rag_processor import latex_rag_processor
16
 
17
+ # LaTeX-OCR + FAISS ์‹œ์Šคํ…œ ๋น„ํ™œ์„ฑํ™” (ModuleNotFoundError ํ•ด๊ฒฐ)
18
+ # from latex_ocr_faiss_integrated import LatexOCRFAISSIntegrated
19
+ # from latex_ocr_faiss_simple import LatexOCRFAISSSimple
20
 
21
  # ๋กœ๊น… ์„ค์ •
22
  logging.basicConfig(level=logging.INFO)
 
31
  self.image_rag = image_rag_processor
32
  self.latex_rag = latex_rag_processor
33
 
34
+ # LaTeX-OCR + FAISS ์‹œ์Šคํ…œ ๋น„ํ™œ์„ฑํ™”
35
  self.latex_ocr_faiss_simple = None
36
  self.latex_ocr_faiss_integrated = None
37
+ # self._init_latex_ocr_faiss() # ๋น„ํ™œ์„ฑํ™”
38
 
39
  def _init_latex_ocr_faiss(self):
40
+ """LaTeX-OCR + FAISS ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™” (๋น„ํ™œ์„ฑํ™”๋จ)"""
41
+ # LaTeX-OCR ๊ธฐ๋Šฅ์ด ์™„์ „ํžˆ ๋น„ํ™œ์„ฑํ™”๋จ
42
+ logger.info("โš ๏ธ LaTeX-OCR + FAISS ์‹œ์Šคํ…œ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
43
+ pass
 
 
 
44
 
45
  def process_document_hybrid(self, file_path: str, user_id: str, document_id: str) -> Dict[str, Any]:
46
  """๋ฌธ์„œ๋ฅผ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ๋กœ ์ฒ˜๋ฆฌ"""
 
89
  logger.error(f"โŒ LaTeX ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
90
  results["latex_processing"] = {"success": False, "error": str(e)}
91
 
92
+ # 4. LaTeX-OCR + FAISS ์ฒ˜๋ฆฌ (๋น„ํ™œ์„ฑํ™”๋จ)
93
+ results["latex_ocr_faiss_processing"] = {
94
+ "success": False,
95
+ "error": "LaTeX-OCR + FAISS ์‹œ์Šคํ…œ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
96
+ }
97
+ logger.info("โš ๏ธ LaTeX-OCR + FAISS ์ฒ˜๋ฆฌ ๊ฑด๋„ˆ๋œ€ (๋น„ํ™œ์„ฑํ™”๋จ)")
 
 
 
 
 
98
 
99
  # ์ „์ฒด ์„ฑ๊ณต ์—ฌ๋ถ€ ํŒ๋‹จ
100
  success_count = sum(1 for key, value in results.items()
 
115
 
116
  def generate_hybrid_response(self, query: str, user_id: str, document_id: str,
117
  use_text: bool = True, use_image: bool = True,
118
+ use_latex: bool = True, use_latex_ocr: bool = False, # ๊ธฐ๋ณธ๊ฐ’์„ False๋กœ ๋ณ€๊ฒฝ
119
  max_length: Optional[int] = None,
120
  temperature: Optional[float] = None,
121
  top_p: Optional[float] = None,
 
174
  logger.error(f"โŒ LaTeX RAG ์‘๋‹ต ์ƒ์„ฑ ์‹คํŒจ: {e}")
175
  responses["latex"] = {"success": False, "error": str(e)}
176
 
177
+ # 4. LaTeX-OCR + FAISS ์‘๋‹ต (๋น„ํ™œ์„ฑํ™”๋จ)
178
+ if use_latex_ocr:
179
+ logger.warning("โš ๏ธ LaTeX-OCR + FAISS ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
180
+ responses["latex_ocr_faiss"] = {
181
+ "success": False,
182
+ "error": "LaTeX-OCR + FAISS ์‹œ์Šคํ…œ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
183
+ }
184
+ else:
185
+ logger.info("โš ๏ธ LaTeX-OCR + FAISS ์‘๋‹ต ๊ฑด๋„ˆ๋œ€ (๋น„ํ™œ์„ฑํ™”๋จ)")
 
 
 
 
 
 
 
186
 
187
  # ํ†ตํ•ฉ ์‘๋‹ต ์ƒ์„ฑ
188
  success_count = sum(1 for response in responses.values() if response.get('success', False))
 
254
  except Exception as e:
255
  logger.error(f"LaTeX ๋ฌธ์„œ ์ •๋ณด ์กฐํšŒ ์‹คํŒจ: {e}")
256
 
257
+ # LaTeX-OCR + FAISS ์ •๋ณด๋Š” ๋น„ํ™œ์„ฑํ™”๋จ
258
+ info["latex_ocr_faiss_info"] = {
259
+ "success": False,
260
+ "error": "LaTeX-OCR + FAISS ์‹œ์Šคํ…œ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
261
+ }
262
+
263
  return {
264
  "success": True,
265
  "document_id": document_id,
lily_llm_core/latex_ocr_processor.py CHANGED
@@ -20,10 +20,11 @@ except ImportError:
20
  EASYOCR_AVAILABLE = False
21
  easyocr = None
22
 
23
- # LaTeX-OCR imports
24
  try:
25
- from pix2tex.cli import LatexOCR
26
- LATEXOCR_AVAILABLE = True
 
27
  except ImportError:
28
  LATEXOCR_AVAILABLE = False
29
  LatexOCR = None
 
20
  EASYOCR_AVAILABLE = False
21
  easyocr = None
22
 
23
+ # LaTeX-OCR imports (๋น„ํ™œ์„ฑํ™”๋จ)
24
  try:
25
+ # from pix2tex.cli import LatexOCR # ๋น„ํ™œ์„ฑํ™”๋จ
26
+ LATEXOCR_AVAILABLE = False
27
+ LatexOCR = None
28
  except ImportError:
29
  LATEXOCR_AVAILABLE = False
30
  LatexOCR = None
lily_llm_core/latex_ocr_subprocess.py CHANGED
@@ -1,227 +1,37 @@
1
  #!/usr/bin/env python3
2
  """
3
- LaTeX-OCR Subprocess Processor
4
  LaTeX-OCR์„ ๋ณ„๋„ ํ”„๋กœ์„ธ์Šค๋กœ ์‹คํ–‰ํ•˜์—ฌ ๋ฒ„์ „ ์ถฉ๋Œ์„ ๋ฐฉ์ง€ํ•ฉ๋‹ˆ๋‹ค.
5
  """
6
 
7
- import subprocess
8
- import json
9
- import tempfile
10
- import os
11
  import logging
12
  from typing import List, Dict, Any, Optional
13
  from PIL import Image
14
- import base64
15
- import io
16
 
17
  logger = logging.getLogger(__name__)
18
 
19
  class LaTeXOCRSubprocessProcessor:
20
- """LaTeX-OCR์„ subprocess๋กœ ์‹คํ–‰ํ•˜๋Š” ํ”„๋กœ์„ธ์„œ"""
21
 
22
  def __init__(self):
23
- self.latex_ocr_script = self._create_latex_ocr_script()
24
 
25
- def _create_latex_ocr_script(self) -> str:
26
- """LaTeX-OCR ์‹คํ–‰ ์Šคํฌ๋ฆฝํŠธ ์ƒ์„ฑ"""
27
- script_content = '''
28
- import sys
29
- import os
30
- import json
31
- from PIL import Image
32
- import io
33
-
34
- # LaTeX-OCR ํ™˜๊ฒฝ ์„ค์ •
35
- latex_ocr_env = os.path.join(os.path.dirname(__file__), 'lily_llm_utils', 'LaTeX-OCR')
36
- if latex_ocr_env not in sys.path:
37
- sys.path.insert(0, latex_ocr_env)
38
-
39
- try:
40
- from pix2tex.cli import LatexOCR
41
- import torch
42
-
43
- def process_image(image_path):
44
- """์ด๋ฏธ์ง€์—์„œ LaTeX ์ถ”์ถœ"""
45
- try:
46
- # LaTeX-OCR ๋ชจ๋ธ ์ดˆ๊ธฐํ™”
47
- model = LatexOCR()
48
-
49
- # ์ด๋ฏธ์ง€ ๋กœ๋“œ ๋ฐ ์ฒ˜๋ฆฌ
50
- img = Image.open(image_path)
51
- result = model(img)
52
-
53
- return {
54
- "success": True,
55
- "latex": result,
56
- "error": None
57
- }
58
- except Exception as e:
59
- return {
60
- "success": False,
61
- "latex": None,
62
- "error": str(e)
63
- }
64
-
65
- # ๋ช…๋ นํ–‰ ์ธ์ž ์ฒ˜๋ฆฌ
66
- if len(sys.argv) > 1:
67
- image_path = sys.argv[1]
68
- result = process_image(image_path)
69
- print(json.dumps(result, ensure_ascii=False))
70
- else:
71
- print(json.dumps({"success": False, "latex": None, "error": "No image path provided"}))
72
-
73
- except ImportError as e:
74
- print(json.dumps({"success": False, "latex": None, "error": f"Import error: {str(e)}"}))
75
- except Exception as e:
76
- print(json.dumps({"success": False, "latex": None, "error": f"Unexpected error: {str(e)}"}))
77
- '''
78
-
79
- # ์ž„์‹œ ์Šคํฌ๋ฆฝํŠธ ํŒŒ์ผ ์ƒ์„ฑ
80
- script_path = os.path.join(tempfile.gettempdir(), 'latex_ocr_processor.py')
81
- with open(script_path, 'w', encoding='utf-8') as f:
82
- f.write(script_content)
83
-
84
- return script_path
85
-
86
  def extract_latex_from_image(self, image: Image.Image) -> Optional[str]:
87
- """์ด๋ฏธ์ง€์—์„œ LaTeX ์ˆ˜์‹ ์ถ”์ถœ"""
88
- try:
89
- # ์ด๋ฏธ์ง€๋ฅผ ์ž„์‹œ ํŒŒ์ผ๋กœ ์ €์žฅ
90
- with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp_file:
91
- image.save(tmp_file.name, 'PNG')
92
- tmp_path = tmp_file.name
93
-
94
- # subprocess๋กœ LaTeX-OCR ์‹คํ–‰
95
- import sys
96
- result = subprocess.run(
97
- [sys.executable, self.latex_ocr_script, tmp_path],
98
- capture_output=True,
99
- text=True,
100
- timeout=30 # 30์ดˆ ํƒ€์ž„์•„์›ƒ
101
- )
102
-
103
- # ์ž„์‹œ ํŒŒ์ผ ์‚ญ์ œ
104
- os.unlink(tmp_path)
105
-
106
- if result.returncode == 0:
107
- try:
108
- output = json.loads(result.stdout.strip())
109
- if output.get("success"):
110
- return output.get("latex")
111
- else:
112
- logger.error(f"LaTeX-OCR ์˜ค๋ฅ˜: {output.get('error')}")
113
- return None
114
- except json.JSONDecodeError:
115
- logger.error(f"JSON ํŒŒ์‹ฑ ์˜ค๋ฅ˜: {result.stdout}")
116
- return None
117
- else:
118
- logger.error(f"LaTeX-OCR ์‹คํ–‰ ์˜ค๋ฅ˜: {result.stderr}")
119
- return None
120
-
121
- except subprocess.TimeoutExpired:
122
- logger.error("LaTeX-OCR ์‹คํ–‰ ์‹œ๊ฐ„ ์ดˆ๊ณผ")
123
- return None
124
- except Exception as e:
125
- logger.error(f"LaTeX-OCR ์ฒ˜๋ฆฌ ์˜ค๋ฅ˜: {e}")
126
- return None
127
 
128
- def extract_latex_from_pdf(self, pdf_path: str) -> Dict[str, Any]:
129
- """PDF์—์„œ LaTeX ์ˆ˜์‹ ์ถ”์ถœ"""
130
- try:
131
- import fitz # PyMuPDF
132
-
133
- doc = fitz.open(pdf_path)
134
- results = {
135
- "success": True,
136
- "pages": [],
137
- "total_pages": len(doc),
138
- "latex_count": 0
139
- }
140
-
141
- for page_num in range(len(doc)):
142
- page = doc.load_page(page_num)
143
-
144
- # ํŽ˜์ด์ง€์—์„œ ์ด๋ฏธ์ง€ ์ถ”์ถœ
145
- image_list = page.get_images()
146
- page_results = {
147
- "page_num": page_num + 1,
148
- "images": [],
149
- "latex_formulas": []
150
- }
151
-
152
- for img_index, img in enumerate(image_list):
153
- try:
154
- # ์ด๋ฏธ์ง€ ์ถ”์ถœ
155
- xref = img[0]
156
- pix = fitz.Pixmap(doc, xref)
157
-
158
- if pix.n - pix.alpha < 4: # CMYK: ์ด๋ฏธ์ง€ ๋ณ€ํ™˜
159
- pix = fitz.Pixmap(fitz.csRGB, pix)
160
-
161
- img_data = pix.tobytes("png")
162
- image = Image.open(io.BytesIO(img_data))
163
-
164
- # LaTeX ์ถ”์ถœ ์‹œ๋„
165
- latex_result = self.extract_latex_from_image(image)
166
-
167
- page_results["images"].append({
168
- "index": img_index,
169
- "size": image.size,
170
- "latex": latex_result
171
- })
172
-
173
- if latex_result:
174
- page_results["latex_formulas"].append(latex_result)
175
- results["latex_count"] += 1
176
-
177
- except Exception as e:
178
- logger.error(f"ํŽ˜์ด์ง€ {page_num + 1} ์ด๋ฏธ์ง€ {img_index} ์ฒ˜๋ฆฌ ์˜ค๋ฅ˜: {e}")
179
- continue
180
-
181
- results["pages"].append(page_results)
182
-
183
- doc.close()
184
- return results
185
-
186
- except Exception as e:
187
- logger.error(f"PDF LaTeX ์ถ”์ถœ ์˜ค๋ฅ˜: {e}")
188
- return {
189
- "success": False,
190
- "error": str(e),
191
- "pages": [],
192
- "total_pages": 0,
193
- "latex_count": 0
194
- }
195
 
196
- def process_image_file(self, image_path: str) -> Dict[str, Any]:
197
- """์ด๋ฏธ์ง€ ํŒŒ์ผ์—์„œ LaTeX ์ถ”์ถœ"""
198
- try:
199
- image = Image.open(image_path)
200
- latex_result = self.extract_latex_from_image(image)
201
-
202
- return {
203
- "success": True,
204
- "image_path": image_path,
205
- "image_size": image.size,
206
- "latex": latex_result
207
- }
208
-
209
- except Exception as e:
210
- logger.error(f"์ด๋ฏธ์ง€ ํŒŒ์ผ ์ฒ˜๋ฆฌ ์˜ค๋ฅ˜: {e}")
211
- return {
212
- "success": False,
213
- "error": str(e),
214
- "image_path": image_path,
215
- "latex": None
216
- }
217
 
218
  def cleanup(self):
219
- """๋ฆฌ์†Œ์Šค ์ •๋ฆฌ"""
220
- try:
221
- if os.path.exists(self.latex_ocr_script):
222
- os.unlink(self.latex_ocr_script)
223
- except Exception as e:
224
- logger.error(f"์Šคํฌ๋ฆฝํŠธ ์ •๋ฆฌ ์˜ค๋ฅ˜: {e}")
225
-
226
- # ์ „์—ญ ์ธ์Šคํ„ด์Šค
227
- latex_ocr_processor = LaTeXOCRSubprocessProcessor()
 
1
  #!/usr/bin/env python3
2
  """
3
+ LaTeX-OCR Subprocess Processor (๋น„ํ™œ์„ฑํ™”๋จ)
4
  LaTeX-OCR์„ ๋ณ„๋„ ํ”„๋กœ์„ธ์Šค๋กœ ์‹คํ–‰ํ•˜์—ฌ ๋ฒ„์ „ ์ถฉ๋Œ์„ ๋ฐฉ์ง€ํ•ฉ๋‹ˆ๋‹ค.
5
  """
6
 
 
 
 
 
7
  import logging
8
  from typing import List, Dict, Any, Optional
9
  from PIL import Image
 
 
10
 
11
  logger = logging.getLogger(__name__)
12
 
13
  class LaTeXOCRSubprocessProcessor:
14
+ """LaTeX-OCR์„ subprocess๋กœ ์‹คํ–‰ํ•˜๋Š” ํ”„๋กœ์„ธ์„œ (๋น„ํ™œ์„ฑํ™”๋จ)"""
15
 
16
  def __init__(self):
17
+ logger.warning("โš ๏ธ LaTeX-OCR Subprocess ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  def extract_latex_from_image(self, image: Image.Image) -> Optional[str]:
20
+ """์ด๋ฏธ์ง€์—์„œ LaTeX ์ˆ˜์‹ ์ถ”์ถœ (๋น„ํ™œ์„ฑํ™”๋จ)"""
21
+ logger.warning("โš ๏ธ LaTeX-OCR ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
22
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ def extract_latex_from_pdf(self, pdf_path: str) -> List[Dict[str, Any]]:
25
+ """PDF์—์„œ LaTeX ์ˆ˜์‹ ์ถ”์ถœ (๋น„ํ™œ์„ฑํ™”๋จ)"""
26
+ logger.warning("โš ๏ธ LaTeX-OCR PDF ์ฒ˜๋ฆฌ ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
27
+ return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ def process_batch_images(self, image_paths: List[str]) -> List[Dict[str, Any]]:
30
+ """๋ฐฐ์น˜ ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ (๋น„ํ™œ์„ฑํ™”๋จ)"""
31
+ logger.warning("โš ๏ธ LaTeX-OCR ๋ฐฐ์น˜ ์ฒ˜๋ฆฌ ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
32
+ return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  def cleanup(self):
35
+ """๋ฆฌ์†Œ์Šค ์ •๋ฆฌ (๋น„ํ™œ์„ฑํ™”๋จ)"""
36
+ logger.info("โš ๏ธ LaTeX-OCR Subprocess ๋ฆฌ์†Œ์Šค ์ •๋ฆฌ (๋น„ํ™œ์„ฑํ™”๋จ)")
37
+ pass
 
 
 
 
 
 
lily_llm_core/latex_ocr_subprocess_v2.py CHANGED
@@ -1,303 +1,52 @@
1
  #!/usr/bin/env python3
2
  """
3
- LaTeX-OCR Subprocess Processor v2
4
  ๋ณ„๋„ ๊ฐ€์ƒํ™˜๊ฒฝ์„ ์‚ฌ์šฉํ•˜์—ฌ ์™„์ „ํžˆ ๊ฒฉ๋ฆฌ๋œ LaTeX-OCR ์‹คํ–‰
5
  """
6
 
7
- import subprocess
8
- import json
9
- import tempfile
10
- import os
11
  import logging
12
  from typing import List, Dict, Any, Optional
13
  from PIL import Image
14
- import base64
15
- import io
16
 
17
  logger = logging.getLogger(__name__)
18
 
19
  class LaTeXOCRSubprocessV2Processor:
20
- """๋ณ„๋„ ๊ฐ€์ƒํ™˜๊ฒฝ์„ ์‚ฌ์šฉํ•˜๋Š” LaTeX-OCR ํ”„๋กœ์„ธ์„œ"""
21
 
22
  def __init__(self, venv_path: str = None):
23
  """
24
  Args:
25
- venv_path: LaTeX-OCR ์ „์šฉ ๊ฐ€์ƒํ™˜๊ฒฝ ๊ฒฝ๋กœ
26
  """
27
- if venv_path is None:
28
- # ๊ธฐ๋ณธ ๊ฐ€์ƒํ™˜๊ฒฝ ๊ฒฝ๋กœ ์„ค์ •
29
- current_dir = os.path.dirname(os.path.abspath(__file__))
30
- venv_path = os.path.join(current_dir, '..', 'latex_ocr_env')
31
 
32
- self.venv_path = venv_path
33
- self.python_executable = self._get_venv_python()
34
- self.latex_ocr_script = self._create_latex_ocr_script()
35
-
36
- def _get_venv_python(self) -> str:
37
- """๊ฐ€์ƒํ™˜๊ฒฝ์˜ Python ์‹คํ–‰ ํŒŒ์ผ ๊ฒฝ๋กœ ๋ฐ˜ํ™˜"""
38
- if os.name == 'nt': # Windows
39
- python_path = os.path.join(self.venv_path, 'Scripts', 'python.exe')
40
- else: # Unix/Linux
41
- python_path = os.path.join(self.venv_path, 'bin', 'python')
42
-
43
- if not os.path.exists(python_path):
44
- logger.warning(f"๊ฐ€์ƒํ™˜๊ฒฝ Python์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {python_path}")
45
- logger.info("์‹œ์Šคํ…œ Python์„ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.")
46
- return None
47
-
48
- return python_path
49
-
50
- def _create_latex_ocr_script(self) -> str:
51
- """LaTeX-OCR ์‹คํ–‰ ์Šคํฌ๋ฆฝํŠธ ์ƒ์„ฑ"""
52
- script_content = '''
53
- import sys
54
- import os
55
- import json
56
- from PIL import Image
57
- import io
58
-
59
- try:
60
- # LaTeX-OCR ํ™˜๊ฒฝ ์„ค์ •
61
- latex_ocr_env = os.path.join(os.path.dirname(__file__), 'lily_llm_utils', 'LaTeX-OCR')
62
- if latex_ocr_env not in sys.path:
63
- sys.path.insert(0, latex_ocr_env)
64
-
65
- from pix2tex.cli import LatexOCR
66
- import torch
67
-
68
- def process_image(image_path):
69
- """์ด๋ฏธ์ง€์—์„œ LaTeX ์ถ”์ถœ"""
70
- try:
71
- # LaTeX-OCR ๋ชจ๋ธ ์ดˆ๊ธฐํ™”
72
- model = LatexOCR()
73
-
74
- # ์ด๋ฏธ์ง€ ๋กœ๋“œ ๋ฐ ์ฒ˜๋ฆฌ
75
- img = Image.open(image_path)
76
- result = model(img)
77
-
78
- return {
79
- "success": True,
80
- "latex": result,
81
- "error": None
82
- }
83
- except Exception as e:
84
- return {
85
- "success": False,
86
- "latex": None,
87
- "error": str(e)
88
- }
89
-
90
- # ๋ช…๋ นํ–‰ ์ธ์ž ์ฒ˜๋ฆฌ
91
- if len(sys.argv) > 1:
92
- image_path = sys.argv[1]
93
- result = process_image(image_path)
94
- print(json.dumps(result, ensure_ascii=False))
95
- else:
96
- print(json.dumps({"success": False, "latex": None, "error": "No image path provided"}))
97
-
98
- except ImportError as e:
99
- print(json.dumps({"success": False, "latex": None, "error": f"Import error: {str(e)}"}))
100
- except Exception as e:
101
- print(json.dumps({"success": False, "latex": None, "error": f"Unexpected error: {str(e)}"}))
102
- '''
103
-
104
- # ์ž„์‹œ ์Šคํฌ๋ฆฝํŠธ ํŒŒ์ผ ์ƒ์„ฑ
105
- script_path = os.path.join(tempfile.gettempdir(), 'latex_ocr_processor_v2.py')
106
- with open(script_path, 'w', encoding='utf-8') as f:
107
- f.write(script_content)
108
-
109
- return script_path
110
-
111
  def extract_latex_from_image(self, image: Image.Image) -> Optional[str]:
112
- """์ด๋ฏธ์ง€์—์„œ LaTeX ์ˆ˜์‹ ์ถ”์ถœ"""
113
- try:
114
- # ์ด๋ฏธ์ง€๋ฅผ ์ž„์‹œ ํŒŒ์ผ๋กœ ์ €์žฅ
115
- with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp_file:
116
- image.save(tmp_file.name, 'PNG')
117
- tmp_path = tmp_file.name
118
-
119
- # subprocess๋กœ LaTeX-OCR ์‹คํ–‰
120
- import sys
121
- cmd = [self.python_executable or sys.executable, self.latex_ocr_script, tmp_path]
122
-
123
- result = subprocess.run(
124
- cmd,
125
- capture_output=True,
126
- text=True,
127
- timeout=30 # 30์ดˆ ํƒ€์ž„์•„์›ƒ
128
- )
129
-
130
- # ์ž„์‹œ ํŒŒ์ผ ์‚ญ์ œ
131
- os.unlink(tmp_path)
132
-
133
- if result.returncode == 0:
134
- try:
135
- output = json.loads(result.stdout.strip())
136
- if output.get("success"):
137
- return output.get("latex")
138
- else:
139
- logger.error(f"LaTeX-OCR ์˜ค๋ฅ˜: {output.get('error')}")
140
- return None
141
- except json.JSONDecodeError:
142
- logger.error(f"JSON ํŒŒ์‹ฑ ์˜ค๋ฅ˜: {result.stdout}")
143
- return None
144
- else:
145
- logger.error(f"LaTeX-OCR ์‹คํ–‰ ์˜ค๋ฅ˜: {result.stderr}")
146
- return None
147
-
148
- except subprocess.TimeoutExpired:
149
- logger.error("LaTeX-OCR ์‹คํ–‰ ์‹œ๊ฐ„ ์ดˆ๊ณผ")
150
- return None
151
- except Exception as e:
152
- logger.error(f"LaTeX-OCR ์ฒ˜๋ฆฌ ์˜ค๋ฅ˜: {e}")
153
- return None
154
 
155
  def extract_latex_from_pdf(self, pdf_path: str) -> Dict[str, Any]:
156
- """PDF์—์„œ LaTeX ์ˆ˜์‹ ์ถ”์ถœ"""
157
- try:
158
- import fitz # PyMuPDF
159
-
160
- doc = fitz.open(pdf_path)
161
- results = {
162
- "success": True,
163
- "pages": [],
164
- "total_pages": len(doc),
165
- "latex_count": 0
166
- }
167
-
168
- for page_num in range(len(doc)):
169
- page = doc.load_page(page_num)
170
-
171
- # ํŽ˜์ด์ง€์—์„œ ์ด๋ฏธ์ง€ ์ถ”์ถœ
172
- image_list = page.get_images()
173
- page_results = {
174
- "page_num": page_num + 1,
175
- "images": [],
176
- "latex_formulas": []
177
- }
178
-
179
- for img_index, img in enumerate(image_list):
180
- try:
181
- # ์ด๋ฏธ์ง€ ์ถ”์ถœ
182
- xref = img[0]
183
- pix = fitz.Pixmap(doc, xref)
184
-
185
- if pix.n - pix.alpha < 4: # CMYK: ์ด๋ฏธ์ง€ ๋ณ€ํ™˜
186
- pix = fitz.Pixmap(fitz.csRGB, pix)
187
-
188
- img_data = pix.tobytes("png")
189
- image = Image.open(io.BytesIO(img_data))
190
-
191
- # LaTeX ์ถ”์ถœ ์‹œ๋„
192
- latex_result = self.extract_latex_from_image(image)
193
-
194
- page_results["images"].append({
195
- "index": img_index,
196
- "size": image.size,
197
- "latex": latex_result
198
- })
199
-
200
- if latex_result:
201
- page_results["latex_formulas"].append(latex_result)
202
- results["latex_count"] += 1
203
-
204
- except Exception as e:
205
- logger.error(f"ํŽ˜์ด์ง€ {page_num + 1} ์ด๋ฏธ์ง€ {img_index} ์ฒ˜๋ฆฌ ์˜ค๋ฅ˜: {e}")
206
- continue
207
-
208
- results["pages"].append(page_results)
209
-
210
- doc.close()
211
- return results
212
-
213
- except Exception as e:
214
- logger.error(f"PDF LaTeX ์ถ”์ถœ ์˜ค๋ฅ˜: {e}")
215
- return {
216
- "success": False,
217
- "error": str(e),
218
- "pages": [],
219
- "total_pages": 0,
220
- "latex_count": 0
221
- }
222
 
223
  def process_image_file(self, image_path: str) -> Dict[str, Any]:
224
- """์ด๋ฏธ์ง€ ํŒŒ์ผ์—์„œ LaTeX ์ถ”์ถœ"""
225
- try:
226
- image = Image.open(image_path)
227
- latex_result = self.extract_latex_from_image(image)
228
-
229
- return {
230
- "success": True,
231
- "image_path": image_path,
232
- "image_size": image.size,
233
- "latex": latex_result
234
- }
235
-
236
- except Exception as e:
237
- logger.error(f"์ด๋ฏธ์ง€ ํŒŒ์ผ ์ฒ˜๋ฆฌ ์˜ค๋ฅ˜: {e}")
238
- return {
239
- "success": False,
240
- "error": str(e),
241
- "image_path": image_path,
242
- "latex": None
243
- }
244
-
245
- def create_venv(self) -> bool:
246
- """LaTeX-OCR ์ „์šฉ ๊ฐ€์ƒํ™˜๊ฒฝ ์ƒ์„ฑ"""
247
- try:
248
- import venv
249
-
250
- if os.path.exists(self.venv_path):
251
- logger.info(f"๊ฐ€์ƒํ™˜๊ฒฝ์ด ์ด๋ฏธ ์กด์žฌํ•ฉ๋‹ˆ๋‹ค: {self.venv_path}")
252
- return True
253
-
254
- logger.info(f"LaTeX-OCR ์ „์šฉ ๊ฐ€์ƒํ™˜๊ฒฝ ์ƒ์„ฑ ์ค‘: {self.venv_path}")
255
- venv.create(self.venv_path, with_pip=True)
256
-
257
- # LaTeX-OCR ์˜์กด์„ฑ ์„ค์น˜
258
- self._install_latex_ocr_dependencies()
259
-
260
- return True
261
-
262
- except Exception as e:
263
- logger.error(f"๊ฐ€์ƒํ™˜๊ฒฝ ์ƒ์„ฑ ์˜ค๋ฅ˜: {e}")
264
- return False
265
-
266
- def _install_latex_ocr_dependencies(self):
267
- """LaTeX-OCR ์˜์กด์„ฑ ์„ค์น˜"""
268
- try:
269
- # pip ์—…๊ทธ๋ ˆ์ด๋“œ
270
- subprocess.run([
271
- self.python_executable, "-m", "pip", "install", "--upgrade", "pip"
272
- ], check=True)
273
-
274
- # LaTeX-OCR ์˜์กด์„ฑ ์„ค์น˜
275
- dependencies = [
276
- "torch==2.0.1",
277
- "transformers==4.30.0",
278
- "timm==0.6.13",
279
- "numpy==1.24.3",
280
- "Pillow",
281
- "requests"
282
- ]
283
-
284
- for dep in dependencies:
285
- subprocess.run([
286
- self.python_executable, "-m", "pip", "install", dep
287
- ], check=True)
288
-
289
- logger.info("LaTeX-OCR ์˜์กด์„ฑ ์„ค์น˜ ์™„๋ฃŒ")
290
-
291
- except Exception as e:
292
- logger.error(f"์˜์กด์„ฑ ์„ค์น˜ ์˜ค๋ฅ˜: {e}")
293
 
294
  def cleanup(self):
295
- """๋ฆฌ์†Œ์Šค ์ •๋ฆฌ"""
296
- try:
297
- if os.path.exists(self.latex_ocr_script):
298
- os.unlink(self.latex_ocr_script)
299
- except Exception as e:
300
- logger.error(f"์Šคํฌ๋ฆฝํŠธ ์ •๋ฆฌ ์˜ค๋ฅ˜: {e}")
301
-
302
- # ์ „์—ญ ์ธ์Šคํ„ด์Šค
303
- latex_ocr_processor_v2 = LaTeXOCRSubprocessV2Processor()
 
1
  #!/usr/bin/env python3
2
  """
3
+ LaTeX-OCR Subprocess Processor v2 (๋น„ํ™œ์„ฑํ™”๋จ)
4
  ๋ณ„๋„ ๊ฐ€์ƒํ™˜๊ฒฝ์„ ์‚ฌ์šฉํ•˜์—ฌ ์™„์ „ํžˆ ๊ฒฉ๋ฆฌ๋œ LaTeX-OCR ์‹คํ–‰
5
  """
6
 
 
 
 
 
7
  import logging
8
  from typing import List, Dict, Any, Optional
9
  from PIL import Image
 
 
10
 
11
  logger = logging.getLogger(__name__)
12
 
13
  class LaTeXOCRSubprocessV2Processor:
14
+ """๋ณ„๋„ ๊ฐ€์ƒํ™˜๊ฒฝ์„ ์‚ฌ์šฉํ•˜๋Š” LaTeX-OCR ํ”„๋กœ์„ธ์„œ (๋น„ํ™œ์„ฑํ™”๋จ)"""
15
 
16
  def __init__(self, venv_path: str = None):
17
  """
18
  Args:
19
+ venv_path: LaTeX-OCR ์ „์šฉ ๊ฐ€์ƒํ™˜๊ฒฝ ๊ฒฝ๋กœ (์‚ฌ์šฉ๋˜์ง€ ์•Š์Œ)
20
  """
21
+ logger.warning("โš ๏ธ LaTeX-OCR Subprocess V2 ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
 
 
 
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def extract_latex_from_image(self, image: Image.Image) -> Optional[str]:
24
+ """์ด๋ฏธ์ง€์—์„œ LaTeX ์ˆ˜์‹ ์ถ”์ถœ (๋น„ํ™œ์„ฑํ™”๋จ)"""
25
+ logger.warning("โš ๏ธ LaTeX-OCR V2 ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
26
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  def extract_latex_from_pdf(self, pdf_path: str) -> Dict[str, Any]:
29
+ """PDF์—์„œ LaTeX ์ˆ˜์‹ ์ถ”์ถœ (๋น„ํ™œ์„ฑํ™”๋จ)"""
30
+ logger.warning("โš ๏ธ LaTeX-OCR V2 PDF ์ฒ˜๋ฆฌ ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
31
+ return {
32
+ "success": False,
33
+ "error": "LaTeX-OCR V2 ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
34
+ "pages": [],
35
+ "total_pages": 0,
36
+ "latex_count": 0
37
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  def process_image_file(self, image_path: str) -> Dict[str, Any]:
40
+ """์ด๋ฏธ์ง€ ํŒŒ์ผ์—์„œ LaTeX ์ถ”์ถœ (๋น„ํ™œ์„ฑํ™”๋จ)"""
41
+ logger.warning("โš ๏ธ LaTeX-OCR V2 ์ด๋ฏธ์ง€ ํŒŒ์ผ ์ฒ˜๋ฆฌ ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
42
+ return {
43
+ "success": False,
44
+ "error": "LaTeX-OCR V2 ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
45
+ "image_path": image_path,
46
+ "latex": None
47
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  def cleanup(self):
50
+ """๋ฆฌ์†Œ์Šค ์ •๋ฆฌ (๋น„ํ™œ์„ฑํ™”๋จ)"""
51
+ logger.info("โš ๏ธ LaTeX-OCR V2 Subprocess ๋ฆฌ์†Œ์Šค ์ •๋ฆฌ (๋น„ํ™œ์„ฑํ™”๋จ)")
52
+ pass
 
 
 
 
 
 
lily_llm_core/latex_rag_processor.py CHANGED
@@ -8,7 +8,7 @@ import logging
8
  from typing import List, Dict, Any, Optional
9
  from pathlib import Path
10
 
11
- from .latex_ocr_processor import latex_ocr_processor
12
  from .vector_store_manager import vector_store_manager
13
 
14
  logger = logging.getLogger(__name__)
@@ -24,241 +24,61 @@ class LaTeXRAGProcessor:
24
  self.max_search_results = max_search_results
25
 
26
  def process_and_store_latex_document(self, user_id: str, document_id: str, file_path: str) -> Dict[str, Any]:
27
- """LaTeX-OCR ๋ฌธ์„œ ์ฒ˜๋ฆฌ ๋ฐ ๋ฒกํ„ฐ ์Šคํ† ์–ด์— ์ €์žฅ"""
28
- try:
29
- logger.info(f"๐Ÿงฎ LaTeX-OCR ๋ฌธ์„œ ์ฒ˜๋ฆฌ ์‹œ์ž‘: {file_path}")
30
-
31
- # ํŒŒ์ผ ํ™•์žฅ์ž ํ™•์ธ
32
- file_ext = Path(file_path).suffix.lower()
33
-
34
- if file_ext == '.pdf':
35
- # PDF LaTeX-OCR ์ฒ˜๋ฆฌ
36
- documents = latex_ocr_processor.process_pdf_with_latex_ocr(file_path)
37
- elif file_ext in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']:
38
- # ์ด๋ฏธ์ง€ ํŒŒ์ผ LaTeX-OCR ์ฒ˜๋ฆฌ
39
- documents = latex_ocr_processor.process_image_file_with_latex(file_path)
40
- else:
41
- return {
42
- "success": False,
43
- "error": f"์ง€์›ํ•˜์ง€ ์•Š๋Š” ํŒŒ์ผ ํ˜•์‹์ž…๋‹ˆ๋‹ค: {file_ext}"
44
- }
45
-
46
- if not documents:
47
- return {
48
- "success": False,
49
- "error": "์ด๋ฏธ์ง€์—์„œ ํ…์ŠคํŠธ๋‚˜ ์ˆ˜์‹์„ ์ถ”์ถœํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
50
- }
51
-
52
- # ๋ฒกํ„ฐ ์Šคํ† ์–ด์— ์ €์žฅ
53
- success = vector_store_manager.add_documents(user_id, document_id, documents)
54
-
55
- if success:
56
- # ํ†ต๊ณ„ ์ •๋ณด ๊ณ„์‚ฐ
57
- total_text_length = sum(doc.metadata.get('text_length', 0) for doc in documents)
58
- total_latex_length = sum(doc.metadata.get('latex_length', 0) for doc in documents)
59
- has_latex_count = sum(1 for doc in documents if doc.metadata.get('has_latex', False))
60
-
61
- return {
62
- "success": True,
63
- "document_id": document_id,
64
- "chunks": len(documents),
65
- "message": "LaTeX-OCR ๋ฌธ์„œ๊ฐ€ ์„ฑ๊ณต์ ์œผ๋กœ ์ฒ˜๋ฆฌ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
66
- "processing_type": "latex_ocr",
67
- "total_text_length": total_text_length,
68
- "total_latex_length": total_latex_length,
69
- "has_latex_count": has_latex_count
70
- }
71
- else:
72
- return {
73
- "success": False,
74
- "error": "๋ฒกํ„ฐ ์Šคํ† ์–ด ์ €์žฅ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."
75
- }
76
-
77
- except Exception as e:
78
- logger.error(f"โŒ LaTeX-OCR ๋ฌธ์„œ ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
79
- return {
80
- "success": False,
81
- "error": str(e)
82
- }
83
 
84
  def generate_latex_rag_response(self, user_id: str, document_id: str, query: str) -> Dict[str, Any]:
85
- """LaTeX-OCR ๊ธฐ๋ฐ˜ RAG ์‘๋‹ต ์ƒ์„ฑ"""
86
- try:
87
- logger.info(f"๐Ÿงฎ LaTeX-OCR RAG ๊ฒ€์ƒ‰ ์‹œ์ž‘: {query}")
88
-
89
- # ์œ ์‚ฌํ•œ ๋ฌธ์„œ ๊ฒ€์ƒ‰
90
- similar_docs = vector_store_manager.search_similar(
91
- user_id, document_id, query, k=self.max_search_results
92
- )
93
-
94
- if not similar_docs:
95
- return {
96
- "success": False,
97
- "response": "๊ด€๋ จ๋œ LaTeX-OCR ๋ฌธ์„œ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.",
98
- "context": "",
99
- "sources": [],
100
- "search_results": 0,
101
- "processing_type": "latex_ocr"
102
- }
103
-
104
- # ์ปจํ…์ŠคํŠธ ๊ตฌ์„ฑ
105
- context = self._build_context(similar_docs)
106
-
107
- # ์†Œ์Šค ์ •๋ณด ์ถ”์ถœ
108
- sources = self._extract_sources(similar_docs)
109
-
110
- # ์‘๋‹ต ์ƒ์„ฑ
111
- response = self._generate_latex_response(query, context, similar_docs)
112
-
113
- logger.info(f"๐Ÿงฎ LaTeX-OCR RAG ์‘๋‹ต ์ƒ์„ฑ ์™„๋ฃŒ: {len(similar_docs)}๊ฐœ ๊ฒฐ๊ณผ")
114
-
115
- return {
116
- "success": True,
117
- "response": response,
118
- "context": context,
119
- "sources": sources,
120
- "search_results": len(similar_docs),
121
- "processing_type": "latex_ocr",
122
- "has_images": True,
123
- "has_latex": any(doc.metadata.get('has_latex', False) for doc in similar_docs),
124
- "image_count": len(similar_docs)
125
- }
126
-
127
- except Exception as e:
128
- logger.error(f"โŒ LaTeX-OCR RAG ์‘๋‹ต ์ƒ์„ฑ ์‹คํŒจ: {e}")
129
- return {
130
- "success": False,
131
- "response": f"LaTeX-OCR RAG ์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}",
132
- "context": "",
133
- "sources": [],
134
- "search_results": 0,
135
- "processing_type": "latex_ocr"
136
- }
137
 
138
  def _build_context(self, documents: List) -> str:
139
- """๋ฌธ์„œ ์ปจํ…์ŠคํŠธ ๊ตฌ์„ฑ"""
140
- if not documents:
141
- return ""
142
-
143
- context_parts = []
144
- for doc in documents:
145
- content = doc.page_content.strip()
146
- if content:
147
- # ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์ •๋ณด ์ถ”๊ฐ€
148
- metadata_info = []
149
- if 'page' in doc.metadata:
150
- metadata_info.append(f"ํŽ˜์ด์ง€ {doc.metadata['page']}")
151
- if 'image_name' in doc.metadata:
152
- metadata_info.append(f"์ด๋ฏธ์ง€: {doc.metadata['image_name']}")
153
- if doc.metadata.get('has_latex', False):
154
- metadata_info.append("LaTeX ์ˆ˜์‹ ํฌํ•จ")
155
-
156
- if metadata_info:
157
- context_parts.append(f"[{' | '.join(metadata_info)}] {content}")
158
- else:
159
- context_parts.append(content)
160
-
161
- return "\n\n".join(context_parts)
162
 
163
  def _extract_sources(self, documents: List) -> List[Dict[str, Any]]:
164
- """์†Œ์Šค ์ •๋ณด ์ถ”์ถœ"""
165
- sources = []
166
-
167
- for doc in documents:
168
- source_info = {
169
- "content": doc.page_content[:200] + "..." if len(doc.page_content) > 200 else doc.page_content,
170
- "page": doc.metadata.get('page', 'N/A'),
171
- "image_name": doc.metadata.get('image_name', 'N/A'),
172
- "processing_type": doc.metadata.get('processing_type', 'latex_ocr'),
173
- "has_text": doc.metadata.get('has_text', False),
174
- "has_latex": doc.metadata.get('has_latex', False),
175
- "text_length": doc.metadata.get('text_length', 0),
176
- "latex_length": doc.metadata.get('latex_length', 0)
177
- }
178
- sources.append(source_info)
179
-
180
- return sources
181
 
182
  def _generate_latex_response(self, query: str, context: str, documents: List) -> str:
183
- """LaTeX-OCR ์‘๋‹ต ์ƒ์„ฑ"""
184
- if not context:
185
- return "์ด๋ฏธ์ง€์—์„œ ๊ด€๋ จ ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
186
-
187
- # LaTeX ์ˆ˜์‹ ํฌํ•จ ์—ฌ๋ถ€ ํ™•์ธ
188
- has_latex = any(doc.metadata.get('has_latex', False) for doc in documents)
189
-
190
- response = f"LaTeX-OCR ๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ๋‹ต๋ณ€๋“œ๋ฆฝ๋‹ˆ๋‹ค:\n\n"
191
- response += f"๐Ÿ“‹ ๊ฒ€์ƒ‰๋œ ๋‚ด์šฉ:\n{context}\n\n"
192
-
193
- if has_latex:
194
- response += f"๐Ÿงฎ ์ˆ˜ํ•™ ์ˆ˜์‹ ๋ถ„์„: ์ด๋ฏธ์ง€์—์„œ LaTeX ์ˆ˜์‹์„ ์ถ”์ถœํ•˜์—ฌ ๋ถ„์„ํ–ˆ์Šต๋‹ˆ๋‹ค.\n"
195
- response += f"๐Ÿ’ก ๋ถ„์„: ํ…์ŠคํŠธ์™€ ์ˆ˜ํ•™ ์ˆ˜์‹์„ ๋ชจ๋‘ ๊ณ ๋ คํ•œ ์ข…ํ•ฉ์ ์ธ ๋ถ„์„ ๊ฒฐ๊ณผ์ž…๋‹ˆ๋‹ค."
196
- else:
197
- response += f"๐Ÿ’ก ๋ถ„์„: ์ด๋ฏธ์ง€์—์„œ ์ถ”์ถœ๋œ ํ…์ŠคํŠธ๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ๊ด€๋ จ ์ •๋ณด๋ฅผ ์ œ๊ณตํ–ˆ์Šต๋‹ˆ๋‹ค."
198
-
199
- return response
200
 
201
  def get_latex_document_info(self, user_id: str, document_id: str) -> Dict[str, Any]:
202
- """LaTeX-OCR ๋ฌธ์„œ ์ •๋ณด ์กฐํšŒ"""
203
- try:
204
- store_path = vector_store_manager.get_document_store_path(user_id, document_id)
205
-
206
- if not store_path.exists():
207
- return {
208
- "success": False,
209
- "error": "๋ฌธ์„œ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
210
- }
211
-
212
- # ๋ฒกํ„ฐ ์Šคํ† ์–ด ๋กœ๋“œ
213
- vector_store = vector_store_manager.load_vector_store(store_path)
214
-
215
- if not vector_store:
216
- return {
217
- "success": False,
218
- "error": "๋ฒกํ„ฐ ์Šคํ† ์–ด๋ฅผ ๋กœ๋“œํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
219
- }
220
-
221
- # ๋ฌธ์„œ ์ •๋ณด ์ˆ˜์ง‘
222
- documents = []
223
- total_text_length = 0
224
- total_latex_length = 0
225
- has_latex_count = 0
226
-
227
- for doc_id in vector_store.index_to_docstore_id:
228
- doc = vector_store.docstore._dict[doc_id]
229
- if doc.metadata.get('processing_type') == 'latex_ocr':
230
- documents.append({
231
- "page": doc.metadata.get('page', 'N/A'),
232
- "image_name": doc.metadata.get('image_name', 'N/A'),
233
- "content_preview": doc.page_content[:100] + "..." if len(doc.page_content) > 100 else doc.page_content,
234
- "has_text": doc.metadata.get('has_text', False),
235
- "has_latex": doc.metadata.get('has_latex', False),
236
- "text_length": doc.metadata.get('text_length', 0),
237
- "latex_length": doc.metadata.get('latex_length', 0)
238
- })
239
-
240
- total_text_length += doc.metadata.get('text_length', 0)
241
- total_latex_length += doc.metadata.get('latex_length', 0)
242
- if doc.metadata.get('has_latex', False):
243
- has_latex_count += 1
244
-
245
- return {
246
- "success": True,
247
- "document_id": document_id,
248
- "total_chunks": len(documents),
249
- "processing_type": "latex_ocr",
250
- "total_text_length": total_text_length,
251
- "total_latex_length": total_latex_length,
252
- "has_latex_count": has_latex_count,
253
- "documents": documents
254
- }
255
-
256
- except Exception as e:
257
- logger.error(f"โŒ LaTeX-OCR ๋ฌธ์„œ ์ •๋ณด ์กฐํšŒ ์‹คํŒจ: {e}")
258
- return {
259
- "success": False,
260
- "error": str(e)
261
- }
262
 
263
  # ์ „์—ญ ์ธ์Šคํ„ด์Šค
264
  latex_rag_processor = LaTeXRAGProcessor()
 
8
  from typing import List, Dict, Any, Optional
9
  from pathlib import Path
10
 
11
+ # from .latex_ocr_processor import latex_ocr_processor # ๋น„ํ™œ์„ฑํ™”๋จ
12
  from .vector_store_manager import vector_store_manager
13
 
14
  logger = logging.getLogger(__name__)
 
24
  self.max_search_results = max_search_results
25
 
26
  def process_and_store_latex_document(self, user_id: str, document_id: str, file_path: str) -> Dict[str, Any]:
27
+ """LaTeX-OCR ๋ฌธ์„œ ์ฒ˜๋ฆฌ ๋ฐ ๋ฒกํ„ฐ ์Šคํ† ์–ด์— ์ €์žฅ (๋น„ํ™œ์„ฑํ™”๋จ)"""
28
+ logger.warning("โš ๏ธ LaTeX-OCR ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
29
+ return {
30
+ "success": False,
31
+ "error": "LaTeX-OCR ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
32
+ "document_id": document_id,
33
+ "chunks": 0,
34
+ "message": "LaTeX-OCR ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
35
+ "processing_type": "latex_ocr_disabled",
36
+ "total_text_length": 0,
37
+ "total_latex_length": 0,
38
+ "has_latex_count": 0
39
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  def generate_latex_rag_response(self, user_id: str, document_id: str, query: str) -> Dict[str, Any]:
42
+ """LaTeX-OCR ๊ธฐ๋ฐ˜ RAG ์‘๋‹ต ์ƒ์„ฑ (๋น„ํ™œ์„ฑํ™”๋จ)"""
43
+ logger.warning("โš ๏ธ LaTeX-OCR RAG ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
44
+ return {
45
+ "success": False,
46
+ "response": "LaTeX-OCR RAG ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
47
+ "context": "",
48
+ "sources": [],
49
+ "search_results": 0,
50
+ "processing_type": "latex_ocr_disabled",
51
+ "has_images": False,
52
+ "has_latex": False,
53
+ "image_count": 0
54
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  def _build_context(self, documents: List) -> str:
57
+ """๋ฌธ์„œ ์ปจํ…์ŠคํŠธ ๊ตฌ์„ฑ (๋น„ํ™œ์„ฑํ™”๋จ)"""
58
+ return "LaTeX-OCR ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  def _extract_sources(self, documents: List) -> List[Dict[str, Any]]:
61
+ """์†Œ์Šค ์ •๋ณด ์ถ”์ถœ (๋น„ํ™œ์„ฑํ™”๋จ)"""
62
+ return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  def _generate_latex_response(self, query: str, context: str, documents: List) -> str:
65
+ """LaTeX-OCR ์‘๋‹ต ์ƒ์„ฑ (๋น„ํ™œ์„ฑํ™”๋จ)"""
66
+ return "LaTeX-OCR ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  def get_latex_document_info(self, user_id: str, document_id: str) -> Dict[str, Any]:
69
+ """LaTeX-OCR ๋ฌธ์„œ ์ •๋ณด ์กฐํšŒ (๋น„ํ™œ์„ฑํ™”๋จ)"""
70
+ logger.warning("โš ๏ธ LaTeX-OCR ๋ฌธ์„œ ์ •๋ณด ์กฐํšŒ ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
71
+ return {
72
+ "success": False,
73
+ "error": "LaTeX-OCR ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
74
+ "document_id": document_id,
75
+ "total_chunks": 0,
76
+ "processing_type": "latex_ocr_disabled",
77
+ "total_text_length": 0,
78
+ "total_latex_length": 0,
79
+ "has_latex_count": 0,
80
+ "documents": []
81
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  # ์ „์—ญ ์ธ์Šคํ„ด์Šค
84
  latex_rag_processor = LaTeXRAGProcessor()
lily_llm_core/lora_manager.py ADDED
@@ -0,0 +1,521 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ LoRA/QLoRA ๊ด€๋ฆฌ์ž (LoRA Manager)
4
+ LoRA ์–ด๋Œ‘ํ„ฐ๋ฅผ ๋กœ๋“œํ•˜๊ณ  ๊ด€๋ฆฌํ•˜๋Š” ์‹œ์Šคํ…œ
5
+ """
6
+
7
+ import logging
8
+ import os
9
+ import json
10
+ import torch
11
+ from typing import Dict, Any, Optional, List, Union
12
+ from pathlib import Path
13
+ import warnings
14
+ import time
15
+
16
+ # logger๋ฅผ ๋จผ์ € ์ •์˜
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # PEFT ๊ด€๋ จ import (์„ค์น˜๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ ๊ฒฝ๊ณ )
20
+ try:
21
+ logger.info("๐Ÿ” PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ import ์‹œ๋„ ์ค‘...")
22
+ from peft import (
23
+ LoraConfig,
24
+ get_peft_model,
25
+ PeftModel,
26
+ TaskType,
27
+ prepare_model_for_kbit_training
28
+ )
29
+ from peft.utils import get_peft_model_state_dict
30
+ PEFT_AVAILABLE = True
31
+ logger.info("โœ… PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ import ์„ฑ๊ณต")
32
+ except ImportError as e:
33
+ PEFT_AVAILABLE = False
34
+ logger.error(f"โŒ PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ import ์‹คํŒจ: {e}")
35
+ logger.error(f"โŒ Python ๊ฒฝ๋กœ: {os.environ.get('PYTHONPATH', 'Not set')}")
36
+ logger.error(f"โŒ ํ˜„์žฌ ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ: {os.getcwd()}")
37
+ warnings.warn(f"PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. LoRA ๊ธฐ๋Šฅ์„ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ์˜ค๋ฅ˜: {e}")
38
+
39
+ # Transformers ๊ด€๋ จ import
40
+ try:
41
+ logger.info("๐Ÿ” Transformers ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ import ์‹œ๋„ ์ค‘...")
42
+ from transformers import (
43
+ AutoModelForCausalLM,
44
+ AutoTokenizer,
45
+ BitsAndBytesConfig,
46
+ TrainingArguments,
47
+ Trainer,
48
+ DataCollatorForLanguageModeling
49
+ )
50
+ TRANSFORMERS_AVAILABLE = True
51
+ logger.info("โœ… Transformers ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ import ์„ฑ๊ณต")
52
+ except ImportError as e:
53
+ TRANSFORMERS_AVAILABLE = False
54
+ logger.error(f"โŒ Transformers ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ import ์‹คํŒจ: {e}")
55
+ warnings.warn(f"Transformers ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ์˜ค๋ฅ˜: {e}")
56
+
57
+ class LoRAManager:
58
+ """LoRA/QLoRA ๋ชจ๋ธ ๊ด€๋ฆฌ ํด๋ž˜์Šค"""
59
+
60
+ def __init__(self, base_model_path: str = None, device: str = "auto"):
61
+ """
62
+ Args:
63
+ base_model_path: ๊ธฐ๋ณธ ๋ชจ๋ธ ๊ฒฝ๋กœ
64
+ device: ์‚ฌ์šฉํ•  ๋””๋ฐ”์ด์Šค ('auto', 'cpu', 'cuda', 'mps')
65
+ """
66
+ logger.info(f"๐Ÿ”ง LoRA ๊ด€๋ฆฌ์ž ์ดˆ๊ธฐํ™” ์‹œ์ž‘: PEFT_AVAILABLE={PEFT_AVAILABLE}, TRANSFORMERS_AVAILABLE={TRANSFORMERS_AVAILABLE}")
67
+
68
+ if not PEFT_AVAILABLE:
69
+ logger.error("โŒ PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
70
+ logger.error("โŒ pip install peft๋ฅผ ์‹คํ–‰ํ–ˆ๋Š”์ง€ ํ™•์ธํ•˜์„ธ์š”.")
71
+ logger.error("โŒ ๊ฐ€์ƒํ™˜๊ฒฝ์ด ํ™œ์„ฑํ™”๋˜์–ด ์žˆ๋Š”์ง€ ํ™•์ธํ•˜์„ธ์š”.")
72
+ raise ImportError("PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค. pip install peft๋ฅผ ์‹คํ–‰ํ•˜์„ธ์š”.")
73
+
74
+ if not TRANSFORMERS_AVAILABLE:
75
+ logger.error("โŒ Transformers ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
76
+ logger.error("โŒ pip install transformers๋ฅผ ์‹คํ–‰ํ–ˆ๋Š”์ง€ ํ™•์ธํ•˜์„ธ์š”.")
77
+ raise ImportError("Transformers ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค. pip install transformers๋ฅผ ์‹คํ–‰ํ•˜์„ธ์š”.")
78
+
79
+ self.base_model_path = base_model_path
80
+ self.device = self._get_device(device)
81
+
82
+ # ๋ชจ๋ธ ๋ฐ ํ† ํฌ๋‚˜์ด์ €
83
+ self.base_model = None
84
+ self.tokenizer = None
85
+ self.lora_model = None
86
+
87
+ # LoRA ์„ค์ •
88
+ self.lora_config = None
89
+ self.current_adapter_name = None
90
+
91
+ # ์–ด๋Œ‘ํ„ฐ ์ €์žฅ ๊ฒฝ๋กœ
92
+ self.adapters_dir = Path("lora_adapters")
93
+ self.adapters_dir.mkdir(exist_ok=True)
94
+
95
+ # ๋กœ๋“œ๋œ ์–ด๋Œ‘ํ„ฐ ๋ชฉ๋ก
96
+ self.loaded_adapters = {}
97
+
98
+ logger.info(f"๐Ÿ”ง LoRA ๊ด€๋ฆฌ์ž ์ดˆ๊ธฐํ™”: device={self.device}")
99
+
100
+ def _get_device(self, device: str) -> str:
101
+ """์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋””๋ฐ”์ด์Šค ํ™•์ธ"""
102
+ if device == "auto":
103
+ if torch.cuda.is_available():
104
+ return "cuda"
105
+ elif torch.backends.mps.is_available():
106
+ return "mps"
107
+ else:
108
+ return "cpu"
109
+ return device
110
+
111
+ def load_base_model(self, model_path: str = None, model_type: str = "causal_lm") -> bool:
112
+ """๊ธฐ๋ณธ ๋ชจ๋ธ ๋กœ๋“œ"""
113
+ try:
114
+ model_path = model_path or self.base_model_path
115
+ if not model_path:
116
+ raise ValueError("๋ชจ๋ธ ๊ฒฝ๋กœ๊ฐ€ ์ง€์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
117
+
118
+ logger.info(f"๐Ÿ“ฅ ๊ธฐ๋ณธ ๋ชจ๋ธ ๋กœ๋”ฉ ์‹œ์ž‘: {model_path}")
119
+
120
+ # ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ
121
+ self.tokenizer = AutoTokenizer.from_pretrained(
122
+ model_path,
123
+ trust_remote_code=True,
124
+ local_files_only=os.path.exists(model_path)
125
+ )
126
+
127
+ # ํŒจ๋”ฉ ํ† ํฐ ์„ค์ •
128
+ if self.tokenizer.pad_token is None:
129
+ self.tokenizer.pad_token = self.tokenizer.eos_token
130
+
131
+ # ๋ชจ๋ธ ๋กœ๋“œ
132
+ if model_type == "causal_lm":
133
+ self.base_model = AutoModelForCausalLM.from_pretrained(
134
+ model_path,
135
+ trust_remote_code=True,
136
+ local_files_only=os.path.exists(model_path),
137
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
138
+ device_map="auto" if self.device == "cuda" else None
139
+ )
140
+ else:
141
+ raise ValueError(f"์ง€์›ํ•˜์ง€ ์•Š๋Š” ๋ชจ๋ธ ํƒ€์ž…: {model_type}")
142
+
143
+ # ๋””๋ฐ”์ด์Šค๋กœ ์ด๋™
144
+ if self.device != "cuda": # cuda๋Š” device_map="auto" ์‚ฌ์šฉ
145
+ self.base_model = self.base_model.to(self.device)
146
+
147
+ self.base_model_path = model_path
148
+ logger.info(f"โœ… ๊ธฐ๋ณธ ๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ: {model_path}")
149
+ return True
150
+
151
+ except Exception as e:
152
+ logger.error(f"โŒ ๊ธฐ๋ณธ ๋ชจ๋ธ ๋กœ๋”ฉ ์‹คํŒจ: {e}")
153
+ return False
154
+
155
+ def create_lora_config(self,
156
+ r: int = 16,
157
+ lora_alpha: int = 32,
158
+ target_modules: List[str] = None,
159
+ lora_dropout: float = 0.1,
160
+ bias: str = "none",
161
+ task_type: str = "CAUSAL_LM") -> LoraConfig:
162
+ """LoRA ์„ค์ • ์ƒ์„ฑ"""
163
+ if target_modules is None:
164
+ # ์ผ๋ฐ˜์ ์ธ ๋ชจ๋ธ ์•„ํ‚คํ…์ฒ˜์— ๋Œ€ํ•œ ๊ธฐ๋ณธ๊ฐ’
165
+ target_modules = ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
166
+
167
+ # TaskType ๋ณ€ํ™˜
168
+ task_type_map = {
169
+ "CAUSAL_LM": TaskType.CAUSAL_LM,
170
+ "SEQ_2_SEQ_LM": TaskType.SEQ_2_SEQ_LM,
171
+ "SEQUENCE_CLASSIFICATION": TaskType.SEQUENCE_CLASSIFICATION,
172
+ "TOKEN_CLASSIFICATION": TaskType.TOKEN_CLASSIFICATION,
173
+ "QUESTION_ANSWERING": TaskType.QUESTION_ANSWERING
174
+ }
175
+
176
+ task_type_enum = task_type_map.get(task_type, TaskType.CAUSAL_LM)
177
+
178
+ self.lora_config = LoraConfig(
179
+ r=r,
180
+ lora_alpha=lora_alpha,
181
+ target_modules=target_modules,
182
+ lora_dropout=lora_dropout,
183
+ bias=bias,
184
+ task_type=task_type_enum
185
+ )
186
+
187
+ logger.info(f"๐Ÿ”ง LoRA ์„ค์ • ์ƒ์„ฑ: r={r}, alpha={lora_alpha}, target_modules={target_modules}")
188
+ return self.lora_config
189
+
190
+ def apply_lora_to_model(self, adapter_name: str = "default") -> bool:
191
+ """LoRA๋ฅผ ๊ธฐ๋ณธ ๋ชจ๋ธ์— ์ ์šฉ"""
192
+ try:
193
+ if self.base_model is None:
194
+ raise ValueError("๊ธฐ๋ณธ ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
195
+
196
+ if self.lora_config is None:
197
+ raise ValueError("LoRA ์„ค์ •์ด ์ƒ์„ฑ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
198
+
199
+ logger.info(f"๐Ÿ”— LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ ์‹œ์ž‘: {adapter_name}")
200
+
201
+ # LoRA ๋ชจ๋ธ ์ƒ์„ฑ
202
+ self.lora_model = get_peft_model(self.base_model, self.lora_config)
203
+
204
+ # ์–ด๋Œ‘ํ„ฐ ์ด๋ฆ„ ์„ค์ •
205
+ self.current_adapter_name = adapter_name
206
+
207
+ # ํ›ˆ๋ จ ๋ชจ๋“œ๋กœ ์„ค์ •
208
+ self.lora_model.train()
209
+
210
+ # ๋ชจ๋ธ ์ •๋ณด ์ถœ๋ ฅ
211
+ self.lora_model.print_trainable_parameters()
212
+
213
+ logger.info(f"โœ… LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ ์™„๋ฃŒ: {adapter_name}")
214
+ return True
215
+
216
+ except Exception as e:
217
+ logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ ์‹คํŒจ: {e}")
218
+ return False
219
+
220
+ def load_lora_adapter(self, adapter_path: str, adapter_name: str = None) -> bool:
221
+ """์ €์žฅ๋œ LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋“œ"""
222
+ try:
223
+ if not os.path.exists(adapter_path):
224
+ raise FileNotFoundError(f"์–ด๋Œ‘ํ„ฐ ๊ฒฝ๋กœ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {adapter_path}")
225
+
226
+ if adapter_name is None:
227
+ adapter_name = Path(adapter_path).stem
228
+
229
+ logger.info(f"๐Ÿ“ฅ LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋”ฉ ์‹œ์ž‘: {adapter_path}")
230
+
231
+ # ๊ธฐ๋ณธ ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ ๋กœ๋“œ
232
+ if self.base_model is None:
233
+ # ์–ด๋Œ‘ํ„ฐ ์„ค์ • ํŒŒ์ผ์—์„œ ๊ธฐ๋ณธ ๋ชจ๋ธ ๊ฒฝ๋กœ ํ™•์ธ
234
+ config_path = os.path.join(adapter_path, "adapter_config.json")
235
+ if os.path.exists(config_path):
236
+ with open(config_path, 'r') as f:
237
+ config = json.load(f)
238
+ base_model_path = config.get("base_model_name_or_path")
239
+ if base_model_path:
240
+ self.load_base_model(base_model_path)
241
+
242
+ # LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋“œ
243
+ self.lora_model = PeftModel.from_pretrained(
244
+ self.base_model,
245
+ adapter_path,
246
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
247
+ )
248
+
249
+ # ๋””๋ฐ”์ด์Šค๋กœ ์ด๋™
250
+ if self.device != "cuda":
251
+ self.lora_model = self.lora_model.to(self.device)
252
+
253
+ self.current_adapter_name = adapter_name
254
+ self.loaded_adapters[adapter_name] = adapter_path
255
+
256
+ logger.info(f"โœ… LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋”ฉ ์™„๋ฃŒ: {adapter_name}")
257
+ return True
258
+
259
+ except Exception as e:
260
+ logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋”ฉ ์‹คํŒจ: {e}")
261
+ return False
262
+
263
+ def save_lora_adapter(self, adapter_name: str = None, output_dir: str = None) -> bool:
264
+ """LoRA ์–ด๋Œ‘ํ„ฐ ์ €์žฅ"""
265
+ try:
266
+ if self.lora_model is None:
267
+ raise ValueError("LoRA ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
268
+
269
+ adapter_name = adapter_name or self.current_adapter_name or "default"
270
+ output_dir = output_dir or str(self.adapters_dir / adapter_name)
271
+
272
+ logger.info(f"๐Ÿ’พ LoRA ์–ด๋Œ‘ํ„ฐ ์ €์žฅ ์‹œ์ž‘: {adapter_name} -> {output_dir}")
273
+
274
+ # ์–ด๋Œ‘ํ„ฐ ์ €์žฅ
275
+ self.lora_model.save_pretrained(output_dir)
276
+
277
+ # ํ† ํฌ๋‚˜์ด์ €๋„ ์ €์žฅ
278
+ if self.tokenizer:
279
+ self.tokenizer.save_pretrained(output_dir)
280
+
281
+ # ์–ด๋Œ‘ํ„ฐ ์ •๋ณด ์ €์žฅ
282
+ adapter_info = {
283
+ "adapter_name": adapter_name,
284
+ "base_model": self.base_model_path,
285
+ "lora_config": self.lora_config.to_dict() if self.lora_config else None,
286
+ "created_at": str(torch.tensor(time.time())),
287
+ "device": self.device
288
+ }
289
+
290
+ with open(os.path.join(output_dir, "adapter_info.json"), 'w') as f:
291
+ json.dump(adapter_info, f, indent=2)
292
+
293
+ logger.info(f"โœ… LoRA ์–ด๋Œ‘ํ„ฐ ์ €์žฅ ์™„๋ฃŒ: {output_dir}")
294
+ return True
295
+
296
+ except Exception as e:
297
+ logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ์ €์žฅ ์‹คํŒจ: {e}")
298
+ return False
299
+
300
+ def merge_lora_with_base(self, output_path: str = None) -> bool:
301
+ """LoRA ์–ด๋Œ‘ํ„ฐ๋ฅผ ๊ธฐ๋ณธ ๋ชจ๋ธ๊ณผ ๋ณ‘ํ•ฉ"""
302
+ try:
303
+ if self.lora_model is None:
304
+ raise ValueError("LoRA ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
305
+
306
+ output_path = output_path or f"{self.base_model_path}_merged"
307
+
308
+ logger.info(f"๐Ÿ”— LoRA ์–ด๋Œ‘ํ„ฐ ๋ณ‘ํ•ฉ ์‹œ์ž‘: {output_path}")
309
+
310
+ # ๋ณ‘ํ•ฉ๋œ ๋ชจ๋ธ ์ƒ์„ฑ
311
+ merged_model = self.lora_model.merge_and_unload()
312
+
313
+ # ๋ณ‘ํ•ฉ๋œ ๋ชจ๋ธ ์ €์žฅ
314
+ merged_model.save_pretrained(output_path)
315
+
316
+ # ํ† ํฌ๋‚˜์ด์ €๋„ ์ €์žฅ
317
+ if self.tokenizer:
318
+ self.tokenizer.save_pretrained(output_path)
319
+
320
+ logger.info(f"โœ… LoRA ์–ด๋Œ‘ํ„ฐ ๋ณ‘ํ•ฉ ์™„๋ฃŒ: {output_path}")
321
+ return True
322
+
323
+ except Exception as e:
324
+ logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ๋ณ‘ํ•ฉ ์‹คํŒจ: {e}")
325
+ return False
326
+
327
+ def list_available_adapters(self) -> List[Dict[str, Any]]:
328
+ """์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ์–ด๋Œ‘ํ„ฐ ๋ชฉ๋ก ๋ฐ˜ํ™˜"""
329
+ adapters = []
330
+
331
+ for adapter_dir in self.adapters_dir.iterdir():
332
+ if adapter_dir.is_dir():
333
+ config_path = adapter_dir / "adapter_config.json"
334
+ info_path = adapter_dir / "adapter_info.json"
335
+
336
+ adapter_info = {
337
+ "name": adapter_dir.name,
338
+ "path": str(adapter_dir),
339
+ "config_exists": config_path.exists(),
340
+ "info_exists": info_path.exists()
341
+ }
342
+
343
+ # ์–ด๋Œ‘ํ„ฐ ์ •๋ณด ๋กœ๋“œ
344
+ if info_path.exists():
345
+ try:
346
+ with open(info_path, 'r') as f:
347
+ info = json.load(f)
348
+ adapter_info.update(info)
349
+ except Exception as e:
350
+ logger.warning(f"์–ด๋Œ‘ํ„ฐ ์ •๋ณด ๋กœ๋“œ ์‹คํŒจ: {e}")
351
+
352
+ adapters.append(adapter_info)
353
+
354
+ return adapters
355
+
356
+ def get_adapter_stats(self) -> Dict[str, Any]:
357
+ """์–ด๋Œ‘ํ„ฐ ํ†ต๊ณ„ ์ •๋ณด ๋ฐ˜ํ™˜"""
358
+ if self.lora_model is None:
359
+ return {"error": "LoRA ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."}
360
+
361
+ try:
362
+ # ํ›ˆ๋ จ ๊ฐ€๋Šฅํ•œ ํŒŒ๋ผ๋ฏธํ„ฐ ์ˆ˜
363
+ trainable_params = 0
364
+ all_param = 0
365
+
366
+ for param in self.lora_model.parameters():
367
+ all_param += param.numel()
368
+ if param.requires_grad:
369
+ trainable_params += param.numel()
370
+
371
+ return {
372
+ "adapter_name": self.current_adapter_name,
373
+ "trainable_params": trainable_params,
374
+ "all_params": all_param,
375
+ "trainable_ratio": trainable_params / all_param if all_param > 0 else 0,
376
+ "device": self.device,
377
+ "model_type": type(self.lora_model).__name__
378
+ }
379
+
380
+ except Exception as e:
381
+ logger.error(f"์–ด๋Œ‘ํ„ฐ ํ†ต๊ณ„ ์ˆ˜์ง‘ ์‹คํŒจ: {e}")
382
+ return {"error": str(e)}
383
+
384
+ def switch_adapter(self, adapter_name: str) -> bool:
385
+ """๋‹ค๋ฅธ ์–ด๋Œ‘ํ„ฐ๋กœ ์ „ํ™˜"""
386
+ try:
387
+ if adapter_name not in self.loaded_adapters:
388
+ # ์–ด๋Œ‘ํ„ฐ ๋กœ๋“œ
389
+ adapter_path = self.adapters_dir / adapter_name
390
+ if not adapter_path.exists():
391
+ raise FileNotFoundError(f"์–ด๋Œ‘ํ„ฐ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {adapter_name}")
392
+
393
+ return self.load_lora_adapter(str(adapter_path), adapter_name)
394
+ else:
395
+ # ์ด๋ฏธ ๋กœ๋“œ๋œ ์–ด๋Œ‘ํ„ฐ ์‚ฌ์šฉ
396
+ self.current_adapter_name = adapter_name
397
+ logger.info(f"๐Ÿ”„ ์–ด๋Œ‘ํ„ฐ ์ „ํ™˜: {adapter_name}")
398
+ return True
399
+
400
+ except Exception as e:
401
+ logger.error(f"โŒ ์–ด๋Œ‘ํ„ฐ ์ „ํ™˜ ์‹คํŒจ: {e}")
402
+ return False
403
+
404
+ def unload_adapter(self) -> bool:
405
+ """LoRA ์–ด๋Œ‘ํ„ฐ ์–ธ๋กœ๋“œ"""
406
+ try:
407
+ if self.lora_model is None:
408
+ return True
409
+
410
+ logger.info("๐Ÿ—‘๏ธ LoRA ์–ด๋Œ‘ํ„ฐ ์–ธ๋กœ๋“œ ์‹œ์ž‘")
411
+
412
+ # ์–ด๋Œ‘ํ„ฐ ์ œ๊ฑฐ
413
+ self.lora_model = None
414
+ self.current_adapter_name = None
415
+ self.lora_config = None
416
+
417
+ logger.info("โœ… LoRA ์–ด๋Œ‘ํ„ฐ ์–ธ๋กœ๋“œ ์™„๋ฃŒ")
418
+ return True
419
+
420
+ except Exception as e:
421
+ logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ์–ธ๋กœ๋“œ ์‹คํŒจ: {e}")
422
+ return False
423
+
424
+ def generate_text(self, prompt: str, max_length: int = 100, temperature: float = 0.7) -> str:
425
+ """LoRA ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•œ ํ…์ŠคํŠธ ์ƒ์„ฑ"""
426
+ try:
427
+ if self.lora_model is None:
428
+ raise ValueError("LoRA ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
429
+
430
+ if self.tokenizer is None:
431
+ raise ValueError("ํ† ํฌ๋‚˜์ด์ €๊ฐ€ ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
432
+
433
+ # ์ž…๋ ฅ ํ† ํฌ๋‚˜์ด์ง•
434
+ inputs = self.tokenizer(prompt, return_tensors="pt")
435
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
436
+
437
+ # ์ถ”๋ก  ๋ชจ๋“œ๋กœ ์„ค์ •
438
+ self.lora_model.eval()
439
+
440
+ with torch.no_grad():
441
+ outputs = self.lora_model.generate(
442
+ **inputs,
443
+ max_new_tokens=max_length,
444
+ temperature=temperature,
445
+ do_sample=True,
446
+ pad_token_id=self.tokenizer.eos_token_id
447
+ )
448
+
449
+ # ์‘๋‹ต ๋””์ฝ”๋”ฉ
450
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
451
+
452
+ # ํ”„๋กฌํ”„ํŠธ ์ œ๊ฑฐ
453
+ if response.startswith(prompt):
454
+ response = response[len(prompt):].strip()
455
+
456
+ return response
457
+
458
+ except Exception as e:
459
+ logger.error(f"โŒ ํ…์ŠคํŠธ ์ƒ์„ฑ ์‹คํŒจ: {e}")
460
+ return f"ํ…์ŠคํŠธ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
461
+
462
+ def prepare_for_training(self, training_args: TrainingArguments = None) -> bool:
463
+ """ํ›ˆ๋ จ์„ ์œ„ํ•œ ๋ชจ๋ธ ์ค€๋น„"""
464
+ try:
465
+ if self.lora_model is None:
466
+ raise ValueError("LoRA ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
467
+
468
+ logger.info("๐Ÿ”ง ํ›ˆ๋ จ์„ ์œ„ํ•œ ๋ชจ๋ธ ์ค€๋น„ ์‹œ์ž‘")
469
+
470
+ # ๊ธฐ๋ณธ ํ›ˆ๋ จ ์ธ์ˆ˜
471
+ if training_args is None:
472
+ training_args = TrainingArguments(
473
+ output_dir="./lora_training_output",
474
+ num_train_epochs=3,
475
+ per_device_train_batch_size=4,
476
+ gradient_accumulation_steps=4,
477
+ learning_rate=2e-4,
478
+ warmup_steps=100,
479
+ logging_steps=10,
480
+ save_steps=500,
481
+ eval_steps=500,
482
+ evaluation_strategy="steps",
483
+ save_strategy="steps",
484
+ load_best_model_at_end=True,
485
+ metric_for_best_model="eval_loss",
486
+ greater_is_better=False,
487
+ fp16=torch.cuda.is_available(),
488
+ dataloader_pin_memory=False,
489
+ )
490
+
491
+ # ํ›ˆ๋ จ ๋ชจ๋“œ๋กœ ์„ค์ •
492
+ self.lora_model.train()
493
+
494
+ # ๊ทธ๋ž˜๋””์–ธํŠธ ์ฒดํฌํฌ์ธํŒ… ํ™œ์„ฑํ™” (๋ฉ”๋ชจ๋ฆฌ ์ ˆ์•ฝ)
495
+ self.lora_model.gradient_checkpointing_enable()
496
+
497
+ # ๊ทธ๋ž˜๋””์–ธํŠธ ํด๋ฆฌํ•‘ ์„ค์ •
498
+ self.lora_model.enable_input_require_grads()
499
+
500
+ logger.info("โœ… ํ›ˆ๋ จ๏ฟฝ๏ฟฝ ์œ„ํ•œ ๋ชจ๋ธ ์ค€๋น„ ์™„๋ฃŒ")
501
+ return True
502
+
503
+ except Exception as e:
504
+ logger.error(f"โŒ ํ›ˆ๋ จ ์ค€๋น„ ์‹คํŒจ: {e}")
505
+ return False
506
+
507
+ # ์ „์—ญ LoRA ๊ด€๋ฆฌ์ž ์ธ์Šคํ„ด์Šค (์•ˆ์ „ํ•œ ์ƒ์„ฑ)
508
+ try:
509
+ if PEFT_AVAILABLE and TRANSFORMERS_AVAILABLE:
510
+ lora_manager = LoRAManager()
511
+ logger.info("โœ… ์ „์—ญ LoRA ๊ด€๋ฆฌ์ž ์ธ์Šคํ„ด์Šค ์ƒ์„ฑ ์™„๋ฃŒ")
512
+ else:
513
+ lora_manager = None
514
+ logger.warning("โš ๏ธ LoRA ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•˜์—ฌ LoRA ๊ด€๋ฆฌ์ž๋ฅผ ์ƒ์„ฑํ•˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
515
+ except Exception as e:
516
+ lora_manager = None
517
+ logger.error(f"โŒ LoRA ๊ด€๋ฆฌ์ž ์ธ์Šคํ„ด์Šค ์ƒ์„ฑ ์‹คํŒจ: {e}")
518
+
519
+ def get_lora_manager() -> Optional[LoRAManager]:
520
+ """์ „์—ญ LoRA ๊ด€๋ฆฌ์ž ๋ฐ˜ํ™˜ (None์ผ ์ˆ˜ ์žˆ์Œ)"""
521
+ return lora_manager
refresh_tokenizer.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer
2
+ import os
3
+
4
+ # 1. ๋ชจ๋ธ์˜ Hugging Face Hub ์ด๋ฆ„
5
+ # model_name = "EleutherAI/polyglot-ko-1.3b"
6
+ # model_name = "EleutherAI/polyglot-ko-5.8b"
7
+ model_name = "kakaocorp/kanana-1.5-v-3b-instruct"
8
+
9
+ # 2. ํ† ํฌ๋‚˜์ด์ € ํŒŒ์ผ์„ ๋ฎ์–ด ์“ธ ๋กœ์ปฌ ๊ฒฝ๋กœ
10
+ # save_directory = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
11
+ # save_directory = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
12
+ save_directory = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
13
+
14
+ print(f"'{model_name}' ๋ชจ๋ธ์˜ ํ† ํฌ๋‚˜์ด์ €๋ฅผ ๋‹ค์šด๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค...")
15
+
16
+ try:
17
+ # Hugging Face Hub์—์„œ ์ธ์ฆ ์ •๋ณด๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ํ† ํฌ๋‚˜์ด์ € ๋‹ค์šด๋กœ๋“œ
18
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
19
+ print("โœ… ํ† ํฌ๋‚˜์ด์ € ๋‹ค์šด๋กœ๋“œ ์„ฑ๊ณต!")
20
+
21
+ # ์ง€์ •๋œ ๋กœ์ปฌ ๊ฒฝ๋กœ์— ์ตœ์‹  ํ˜•์‹์œผ๋กœ ์ €์žฅ
22
+ if not os.path.exists(save_directory):
23
+ os.makedirs(save_directory)
24
+ print(f"'{save_directory}' ํด๋”๋ฅผ ์ƒ์„ฑํ–ˆ์Šต๋‹ˆ๋‹ค.")
25
+
26
+ tokenizer.save_pretrained(save_directory)
27
+ print(f"โœ… ํ† ํฌ๋‚˜์ด์ €๋ฅผ '{save_directory}' ๊ฒฝ๋กœ์— ์„ฑ๊ณต์ ์œผ๋กœ ์ €์žฅํ–ˆ์Šต๋‹ˆ๋‹ค.")
28
+ print("\n์ด์ œ Lily LLM ์„œ๋ฒ„๋ฅผ ๋‹ค์‹œ ์‹คํ–‰ํ•ด ์ฃผ์„ธ์š”.")
29
+
30
+ except Exception as e:
31
+ print(f"\nโŒ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
32
+ print("๋ชจ๋ธ ์ด๋ฆ„์„ ํ™•์ธํ•˜๊ฑฐ๋‚˜ ์ธํ„ฐ๋„ท ์—ฐ๊ฒฐ ์ƒํƒœ๋ฅผ ์ ๊ฒ€ํ•ด ์ฃผ์„ธ์š”.")
33
+
34
+
35
+ # from transformers import AutoTokenizer
36
+ # import os
37
+
38
+ # # 1. ํ† ํฌ๋‚˜์ด์ €๋ฅผ ์ฝ๊ณ  ๋‹ค์‹œ ์ €์žฅํ•  ๋กœ์ปฌ ๋ชจ๋ธ ๊ฒฝ๋กœ
39
+ # # ์ด ๊ฒฝ๋กœ์— tokenizer.json ํŒŒ์ผ์ด ์ด๋ฏธ ์กด์žฌํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
40
+ # model_and_tokenizer_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
41
+
42
+ # print(f"๋กœ์ปฌ ๊ฒฝ๋กœ์—์„œ ํ† ํฌ๋‚˜์ด์ €๋ฅผ ์ฝ์–ด์˜ต๋‹ˆ๋‹ค: '{model_and_tokenizer_path}'")
43
+
44
+ # try:
45
+ # # ์ธํ„ฐ๋„ท ์—ฐ๊ฒฐ ์—†์ด ๋กœ์ปฌ ํŒŒ์ผ๋งŒ์œผ๋กœ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ
46
+ # tokenizer = AutoTokenizer.from_pretrained(
47
+ # model_and_tokenizer_path,
48
+ # local_files_only=True # ์ด ์˜ต์…˜์ด ํ•ต์‹ฌ์ž…๋‹ˆ๋‹ค!
49
+ # )
50
+ # print("โœ… ๋กœ์ปฌ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ ์„ฑ๊ณต!")
51
+
52
+ # # ํ˜„์žฌ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ๋ฒ„์ „์— ๋งž๋Š” ํ˜•์‹์œผ๋กœ ๋™์ผํ•œ ๊ฒฝ๋กœ์— ๋‹ค์‹œ ์ €์žฅ (๋ฎ์–ด์“ฐ๊ธฐ)
53
+ # tokenizer.save_pretrained(model_and_tokenizer_path)
54
+ # print(f"โœ… ํ† ํฌ๋‚˜์ด์ €๋ฅผ '{model_and_tokenizer_path}' ๊ฒฝ๋กœ์— ์ƒˆ ํ˜•์‹์œผ๋กœ ๋‹ค์‹œ ์ €์žฅํ–ˆ์Šต๋‹ˆ๋‹ค.")
55
+ # print("\n์ด์ œ Lily LLM ์„œ๋ฒ„๋ฅผ ๋‹ค์‹œ ์‹คํ–‰ํ•ด ์ฃผ์„ธ์š”.")
56
+
57
+ # except Exception as e:
58
+ # print(f"\nโŒ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
59
+ # print(f"'{model_and_tokenizer_path}' ๊ฒฝ๋กœ์— ํ† ํฌ๋‚˜์ด์ € ํŒŒ์ผ๋“ค์ด ์˜ฌ๋ฐ”๋ฅด๊ฒŒ ์กด์žฌํ•˜๋Š”์ง€ ํ™•์ธํ•ด ์ฃผ์„ธ์š”.")
60
+
refresh_tokenizer_kanana.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer
2
+ import os
3
+
4
+ # 1. ๋ชจ๋ธ์˜ Hugging Face Hub ์ด๋ฆ„
5
+ # model_name = "EleutherAI/polyglot-ko-1.3b"
6
+ # model_name = "EleutherAI/polyglot-ko-5.8b"
7
+ model_name = "kakaocorp/kanana-1.5-v-3b-instruct"
8
+
9
+ # 2. ํ† ํฌ๋‚˜์ด์ € ํŒŒ์ผ์„ ๋ฎ์–ด ์“ธ ๋กœ์ปฌ ๊ฒฝ๋กœ
10
+ # save_directory = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
11
+ # save_directory = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
12
+ save_directory = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
13
+
14
+ print(f"'{model_name}' ๋ชจ๋ธ์˜ ํ† ํฌ๋‚˜์ด์ €๋ฅผ ๋‹ค์šด๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค...")
15
+
16
+ try:
17
+ # Hugging Face Hub์—์„œ ์ธ์ฆ ์ •๋ณด๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ํ† ํฌ๋‚˜์ด์ € ๋‹ค์šด๋กœ๋“œ
18
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
19
+ print("โœ… ํ† ํฌ๋‚˜์ด์ € ๋‹ค์šด๋กœ๋“œ ์„ฑ๊ณต!")
20
+
21
+ # ์ง€์ •๋œ ๋กœ์ปฌ ๊ฒฝ๋กœ์— ์ตœ์‹  ํ˜•์‹์œผ๋กœ ์ €์žฅ
22
+ if not os.path.exists(save_directory):
23
+ os.makedirs(save_directory)
24
+ print(f"'{save_directory}' ํด๋”๋ฅผ ์ƒ์„ฑํ–ˆ์Šต๋‹ˆ๋‹ค.")
25
+
26
+ tokenizer.save_pretrained(save_directory)
27
+ print(f"โœ… ํ† ํฌ๋‚˜์ด์ €๋ฅผ '{save_directory}' ๊ฒฝ๋กœ์— ์„ฑ๊ณต์ ์œผ๋กœ ์ €์žฅํ–ˆ์Šต๋‹ˆ๋‹ค.")
28
+ print("\n์ด์ œ Lily LLM ์„œ๋ฒ„๋ฅผ ๋‹ค์‹œ ์‹คํ–‰ํ•ด ์ฃผ์„ธ์š”.")
29
+
30
+ except Exception as e:
31
+ print(f"\nโŒ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
32
+ print("๋ชจ๋ธ ์ด๋ฆ„์„ ํ™•์ธํ•˜๊ฑฐ๋‚˜ ์ธํ„ฐ๋„ท ์—ฐ๊ฒฐ ์ƒํƒœ๋ฅผ ์ ๊ฒ€ํ•ด ์ฃผ์„ธ์š”.")
33
+
34
+
35
+ # from transformers import AutoTokenizer
36
+ # import os
37
+
38
+ # # 1. ํ† ํฌ๋‚˜์ด์ €๋ฅผ ์ฝ๊ณ  ๋‹ค์‹œ ์ €์žฅํ•  ๋กœ์ปฌ ๋ชจ๋ธ ๊ฒฝ๋กœ
39
+ # # ์ด ๊ฒฝ๋กœ์— tokenizer.json ํŒŒ์ผ์ด ์ด๋ฏธ ์กด์žฌํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
40
+ # model_and_tokenizer_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
41
+
42
+ # print(f"๋กœ์ปฌ ๊ฒฝ๋กœ์—์„œ ํ† ํฌ๋‚˜์ด์ €๋ฅผ ์ฝ์–ด์˜ต๋‹ˆ๋‹ค: '{model_and_tokenizer_path}'")
43
+
44
+ # try:
45
+ # # ์ธํ„ฐ๋„ท ์—ฐ๊ฒฐ ์—†์ด ๋กœ์ปฌ ํŒŒ์ผ๋งŒ์œผ๋กœ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ
46
+ # tokenizer = AutoTokenizer.from_pretrained(
47
+ # model_and_tokenizer_path,
48
+ # local_files_only=True # ์ด ์˜ต์…˜์ด ํ•ต์‹ฌ์ž…๋‹ˆ๋‹ค!
49
+ # )
50
+ # print("โœ… ๋กœ์ปฌ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ ์„ฑ๊ณต!")
51
+
52
+ # # ํ˜„์žฌ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ๋ฒ„์ „์— ๋งž๋Š” ํ˜•์‹์œผ๋กœ ๋™์ผํ•œ ๊ฒฝ๋กœ์— ๋‹ค์‹œ ์ €์žฅ (๋ฎ์–ด์“ฐ๊ธฐ)
53
+ # tokenizer.save_pretrained(model_and_tokenizer_path)
54
+ # print(f"โœ… ํ† ํฌ๋‚˜์ด์ €๋ฅผ '{model_and_tokenizer_path}' ๊ฒฝ๋กœ์— ์ƒˆ ํ˜•์‹์œผ๋กœ ๋‹ค์‹œ ์ €์žฅํ–ˆ์Šต๋‹ˆ๋‹ค.")
55
+ # print("\n์ด์ œ Lily LLM ์„œ๋ฒ„๋ฅผ ๋‹ค์‹œ ์‹คํ–‰ํ•ด ์ฃผ์„ธ์š”.")
56
+
57
+ # except Exception as e:
58
+ # print(f"\nโŒ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
59
+ # print(f"'{model_and_tokenizer_path}' ๊ฒฝ๋กœ์— ํ† ํฌ๋‚˜์ด์ € ํŒŒ์ผ๋“ค์ด ์˜ฌ๋ฐ”๋ฅด๊ฒŒ ์กด์žฌํ•˜๋Š”์ง€ ํ™•์ธํ•ด ์ฃผ์„ธ์š”.")
60
+
61
+
62
+
63
+
64
+
65
+
66
+
67
+ # from PIL import Image
68
+ # import torch
69
+ # from transformers import AutoModelForVision2Seq, AutoProcessor
70
+
71
+ # MODEL = "kakaocorp/kanana-1.5-v-3b-instruct"
72
+
73
+ # # Load the model on the available device(s)
74
+ # model = AutoModelForVision2Seq.from_pretrained(
75
+ # MODEL,
76
+ # torch_dtype=torch.bfloat16,
77
+ # device_map="auto",
78
+ # trust_remote_code=True
79
+ # )
80
+ # model.eval()
81
+
82
+ # # Load processor
83
+ # processor = AutoProcessor.from_pretrained(MODEL, trust_remote_code=True)
84
+
85
+ # # Prepare input batch
86
+ # batch = []
87
+ # for _ in range(1): # dummy loop to demonstrate batch processing
88
+ # image_files = [
89
+ # "./examples/waybill.png"
90
+ # ]
91
+
92
+ # sample = {
93
+ # "image": [Image.open(image_file_path).convert("RGB") for image_file_path in image_files],
94
+ # "conv": [
95
+ # {"role": "system", "content": "The following is a conversation between a curious human and AI assistant."},
96
+ # {"role": "user", "content": " ".join(["<image>"] * len(image_files))},
97
+ # {"role": "user", "content": "์‚ฌ์ง„์—์„œ ๋ณด๋‚ด๋Š” ์‚ฌ๋žŒ๊ณผ ๋ฐ›๋Š” ์‚ฌ๋žŒ ์ •๋ณด๋ฅผ json ํ˜•ํƒœ๋กœ ์ •๋ฆฌํ•ด์ค˜."},
98
+ # ]
99
+ # }
100
+
101
+ # batch.append(sample)
102
+
103
+ # inputs = processor.batch_encode_collate(
104
+ # batch, padding_side="left", add_generation_prompt=True, max_length=8192
105
+ # )
106
+ # inputs = {k: v.to(model.device) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
107
+
108
+ # # Set the generation config
109
+ # gen_kwargs = {
110
+ # "max_new_tokens": 2048,
111
+ # "temperature": 0,
112
+ # "top_p": 1.0,
113
+ # "num_beams": 1,
114
+ # "do_sample": False,
115
+ # }
116
+
117
+ # # Generate text
118
+ # gens = model.generate(
119
+ # **inputs,
120
+ # **gen_kwargs,
121
+ # )
122
+ # text_outputs = processor.tokenizer.batch_decode(gens, skip_special_tokens=True)
123
+ # print(text_outputs) # ['```json\n{\n "๋ณด๋‚ด๋Š”๋ถ„": {\n "์„ฑ๋ช…": "์นด์นด์˜ค",\n "์ฃผ์†Œ": "๊ฒฝ๊ธฐ๋„ ์„ฑ๋‚จ์‹œ ํŒ๊ต์—ญ๋กœ 166"\n },\n "๋ฐ›๋Š”๋ถ„": {\n "์„ฑ๋ช…": "์นด๋‚˜๋‚˜",\n "์ฃผ์†Œ": "์ œ์ฃผ๋„ ์ œ์ฃผ์‹œ ์ฒจ๋‹จ๋กœ 242"\n }\n}\n```']
124
+
refresh_tokenizer_polyglot.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer
2
+ import os
3
+
4
+ # 1. ๋ชจ๋ธ์˜ Hugging Face Hub ์ด๋ฆ„
5
+ model_name = "EleutherAI/polyglot-ko-1.3b"
6
+ # model_name = "EleutherAI/polyglot-ko-5.8b"
7
+ # model_name = "kakaocorp/kanana-1.5-v-3b-instruct"
8
+
9
+ # 2. ํ† ํฌ๋‚˜์ด์ € ํŒŒ์ผ์„ ๋ฎ์–ด ์“ธ ๋กœ์ปฌ ๊ฒฝ๋กœ
10
+ save_directory = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
11
+ # save_directory = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
12
+ # save_directory = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
13
+
14
+ print(f"'{model_name}' ๋ชจ๋ธ์˜ ํ† ํฌ๋‚˜์ด์ €๋ฅผ ๋‹ค์šด๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค...")
15
+
16
+ try:
17
+ # Hugging Face Hub์—์„œ ์ธ์ฆ ์ •๋ณด๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ํ† ํฌ๋‚˜์ด์ € ๋‹ค์šด๋กœ๋“œ
18
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
19
+ print("โœ… ํ† ํฌ๋‚˜์ด์ € ๋‹ค์šด๋กœ๋“œ ์„ฑ๊ณต!")
20
+
21
+ # ์ง€์ •๋œ ๋กœ์ปฌ ๊ฒฝ๋กœ์— ์ตœ์‹  ํ˜•์‹์œผ๋กœ ์ €์žฅ
22
+ if not os.path.exists(save_directory):
23
+ os.makedirs(save_directory)
24
+ print(f"'{save_directory}' ํด๋”๋ฅผ ์ƒ์„ฑํ–ˆ์Šต๋‹ˆ๋‹ค.")
25
+
26
+ tokenizer.save_pretrained(save_directory)
27
+ print(f"โœ… ํ† ํฌ๋‚˜์ด์ €๋ฅผ '{save_directory}' ๊ฒฝ๋กœ์— ์„ฑ๊ณต์ ์œผ๋กœ ์ €์žฅํ–ˆ์Šต๋‹ˆ๋‹ค.")
28
+ print("\n์ด์ œ Lily LLM ์„œ๋ฒ„๋ฅผ ๋‹ค์‹œ ์‹คํ–‰ํ•ด ์ฃผ์„ธ์š”.")
29
+
30
+ except Exception as e:
31
+ print(f"\nโŒ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
32
+ print("๋ชจ๋ธ ์ด๋ฆ„์„ ํ™•์ธํ•˜๊ฑฐ๋‚˜ ์ธํ„ฐ๋„ท ์—ฐ๊ฒฐ ์ƒํƒœ๋ฅผ ์ ๊ฒ€ํ•ด ์ฃผ์„ธ์š”.")
33
+
34
+
35
+ # from transformers import AutoTokenizer
36
+ # import os
37
+
38
+ # # 1. ํ† ํฌ๋‚˜์ด์ €๋ฅผ ์ฝ๊ณ  ๋‹ค์‹œ ์ €์žฅํ•  ๋กœ์ปฌ ๋ชจ๋ธ ๊ฒฝ๋กœ
39
+ # # ์ด ๊ฒฝ๋กœ์— tokenizer.json ํŒŒ์ผ์ด ์ด๋ฏธ ์กด์žฌํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
40
+ # model_and_tokenizer_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
41
+
42
+ # print(f"๋กœ์ปฌ ๊ฒฝ๋กœ์—์„œ ํ† ํฌ๋‚˜์ด์ €๋ฅผ ์ฝ์–ด์˜ต๋‹ˆ๋‹ค: '{model_and_tokenizer_path}'")
43
+
44
+ # try:
45
+ # # ์ธํ„ฐ๋„ท ์—ฐ๊ฒฐ ์—†์ด ๋กœ์ปฌ ํŒŒ์ผ๋งŒ์œผ๋กœ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ
46
+ # tokenizer = AutoTokenizer.from_pretrained(
47
+ # model_and_tokenizer_path,
48
+ # local_files_only=True # ์ด ์˜ต์…˜์ด ํ•ต์‹ฌ์ž…๋‹ˆ๋‹ค!
49
+ # )
50
+ # print("โœ… ๋กœ์ปฌ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ ์„ฑ๊ณต!")
51
+
52
+ # # ํ˜„์žฌ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ๋ฒ„์ „์— ๋งž๋Š” ํ˜•์‹์œผ๋กœ ๋™์ผํ•œ ๊ฒฝ๋กœ์— ๋‹ค์‹œ ์ €์žฅ (๋ฎ์–ด์“ฐ๊ธฐ)
53
+ # tokenizer.save_pretrained(model_and_tokenizer_path)
54
+ # print(f"โœ… ํ† ํฌ๋‚˜์ด์ €๋ฅผ '{model_and_tokenizer_path}' ๊ฒฝ๋กœ์— ์ƒˆ ํ˜•์‹์œผ๋กœ ๋‹ค์‹œ ์ €์žฅํ–ˆ์Šต๋‹ˆ๋‹ค.")
55
+ # print("\n์ด์ œ Lily LLM ์„œ๋ฒ„๋ฅผ ๋‹ค์‹œ ์‹คํ–‰ํ•ด ์ฃผ์„ธ์š”.")
56
+
57
+ # except Exception as e:
58
+ # print(f"\nโŒ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
59
+ # print(f"'{model_and_tokenizer_path}' ๊ฒฝ๋กœ์— ํ† ํฌ๋‚˜์ด์ € ํŒŒ์ผ๋“ค์ด ์˜ฌ๋ฐ”๋ฅด๊ฒŒ ์กด์žฌํ•˜๋Š”์ง€ ํ™•์ธํ•ด ์ฃผ์„ธ์š”.")
60
+
requirements.txt CHANGED
@@ -1,73 +1,53 @@
1
- # Core FastAPI
2
- fastapi>=0.104.0
3
- uvicorn[standard]>=0.24.0
4
- python-multipart>=0.0.6
5
- pydantic>=2.0.0
6
- pydantic-settings>=2.0.0
7
-
8
- # Machine Learning
9
- torch>=2.0.0
10
- transformers>=4.35.0
11
- accelerate>=0.24.0
12
- tokenizers>=0.15.0
13
- safetensors>=0.4.0
14
-
15
- # Image Processing
16
- Pillow>=10.0.0
17
- opencv-python>=4.8.0
18
-
19
- # Vector Database
20
- faiss-cpu>=1.7.4
21
- sentence-transformers>=2.2.2
22
-
23
- # LangChain
24
- langchain>=0.1.0
25
- langchain-community>=0.0.10
26
-
27
- # Text Processing
28
- nltk>=3.8.1
29
-
30
- # HTTP Requests
31
- requests>=2.31.0
32
- aiohttp>=3.9.0
33
-
34
- # Utilities
35
- python-dotenv>=1.0.0
36
- numpy>=1.24.0
37
- pandas>=2.1.0
38
-
39
- # PDF Processing
40
- PyMuPDF>=1.23.0
41
-
42
- # Document Processing
43
- python-docx>=1.1.0
44
- python-pptx>=1.0.0
45
-
46
- # OCR
47
- easyocr>=1.7.0
48
- pytesseract>=0.3.10
49
-
50
- # Database
51
- sqlalchemy>=2.0.0
52
-
53
- # Task Queue
54
- celery>=5.3.0
55
- redis>=5.0.0
56
-
57
- # Security
58
- python-jose[cryptography]>=3.3.0
59
- passlib[bcrypt]>=1.7.4
60
-
61
- # WebSocket
62
- websockets>=12.0
63
-
64
- # Performance Monitoring
65
- psutil>=5.9.0
66
-
67
- # Logging
68
- python-json-logger>=3.0.0
69
-
70
- # additional
71
- PyJWT==2.8.0
72
- einops==0.8.1
73
- timm==1.0.19
 
1
+ # requirements.txt (Final Version for Kanana)
2
+
3
+ # Core ML/DL Stack (Pinned for Kanana Model Compatibility)
4
+ numpy==1.26.4
5
+ torch==2.3.1
6
+ torchvision==0.18.1
7
+ # transformers==4.29.2
8
+ # tokenizers==0.13.3
9
+ transformers
10
+ tokenizers
11
+ peft==0.10.0
12
+ accelerate==0.30.1
13
+ bitsandbytes
14
+ safetensors
15
+
16
+ # OCR and Image Processing
17
+ easyocr
18
+ opencv-python-headless
19
+
20
+ # Web Framework & Parsers
21
+ fastapi
22
+ uvicorn[standard]
23
+ markdown-it-py==3.0.0
24
+
25
+ # Other dependencies
26
+ python-multipart
27
+ pydantic
28
+ pydantic-settings
29
+ Pillow
30
+ faiss-cpu
31
+ sentence-transformers
32
+ langchain
33
+ langchain-community
34
+ nltk
35
+ requests
36
+ aiohttp
37
+ python-dotenv
38
+ pandas
39
+ PyMuPDF
40
+ python-docx
41
+ python-pptx
42
+ pytesseract
43
+ sqlalchemy
44
+ celery
45
+ redis
46
+ python-jose[cryptography]
47
+ passlib[bcrypt]
48
+ websockets
49
+ psutil
50
+ python-json-logger
51
+ PyJWT
52
+ einops
53
+ timm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements_250819_0958txt ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core FastAPI
2
+ fastapi>=0.104.0
3
+ uvicorn[standard]>=0.24.0
4
+ python-multipart>=0.0.6
5
+ pydantic>=2.0.0
6
+ pydantic-settings>=2.0.0
7
+
8
+ # Machine Learning
9
+ torch>=2.0.0
10
+ transformers>=4.41.2
11
+ accelerate>=0.30.1
12
+ tokenizers>=0.15.0
13
+ safetensors>=0.4.0
14
+ trl>=0.8.6
15
+
16
+ # LoRA/QLoRA Support
17
+ peft>=0.10.0
18
+ bitsandbytes>=0.41.0
19
+
20
+ # Image Processing
21
+ Pillow>=10.0.0
22
+ opencv-python>=4.8.0
23
+
24
+ # Vector Database
25
+ faiss-cpu>=1.7.4
26
+ sentence-transformers>=2.2.2
27
+
28
+ # LangChain
29
+ langchain>=0.1.0
30
+ langchain-community>=0.0.10
31
+
32
+ # Text Processing
33
+ nltk>=3.8.1
34
+
35
+ # HTTP Requests
36
+ requests>=2.31.0
37
+ aiohttp>=3.9.0
38
+
39
+ # Utilities
40
+ python-dotenv>=1.0.0
41
+ numpy>=1.24.0
42
+ pandas>=2.1.0
43
+
44
+ # PDF Processing
45
+ PyMuPDF>=1.23.0
46
+
47
+ # Document Processing
48
+ python-docx>=1.1.0
49
+ python-pptx>=1.0.0
50
+
51
+ # OCR
52
+ easyocr>=1.7.0
53
+ pytesseract>=0.3.10
54
+
55
+ # Database
56
+ sqlalchemy>=2.0.0
57
+
58
+ # Task Queue
59
+ celery>=5.3.0
60
+ redis>=5.0.0
61
+
62
+ # Security
63
+ python-jose[cryptography]>=3.3.0
64
+ passlib[bcrypt]>=1.7.4
65
+
66
+ # WebSocket
67
+ websockets>=12.0
68
+
69
+ # Performance Monitoring
70
+ psutil>=5.9.0
71
+
72
+ # Logging
73
+ python-json-logger>=3.0.0
74
+
75
+ # additional
76
+ PyJWT==2.8.0
77
+ einops==0.8.1
78
+ timm==1.0.19
requirements_250819_1025.txt ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # requirements.txt
2
+
3
+ # Core ML/DL Stack (Pinned for Stability)
4
+ numpy==1.26.4
5
+ torch==2.3.1
6
+ torchvision==0.18.1
7
+ transformers==4.41.2
8
+ peft==0.10.0
9
+ trl==0.8.6
10
+ accelerate==0.30.1
11
+ bitsandbytes
12
+ safetensors
13
+ tokenizers
14
+
15
+ # OCR and Image Processing
16
+ easyocr
17
+ opencv-python-headless
18
+
19
+ # Web Framework
20
+ fastapi
21
+ uvicorn[standard]
22
+
23
+ # Parsers and Utilities (Pinned to fix conflicts)
24
+ markdown-it-py==3.0.0
25
+ mdit-py-plugins
26
+ myst-parser
27
+
28
+ # Other dependencies from your original file
29
+ python-multipart
30
+ pydantic
31
+ pydantic-settings
32
+ Pillow
33
+ faiss-cpu
34
+ sentence-transformers
35
+ langchain
36
+ langchain-community
37
+ nltk
38
+ requests
39
+ aiohttp
40
+ python-dotenv
41
+ pandas
42
+ PyMuPDF
43
+ python-docx
44
+ python-pptx
45
+ pytesseract
46
+ sqlalchemy
47
+ celery
48
+ redis
49
+ python-jose[cryptography]
50
+ passlib[bcrypt]
51
+ websockets
52
+ psutil
53
+ python-json-logger
54
+ PyJWT
55
+ einops
56
+ timm
requirements_full_250819_0721.txt ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.9.0
2
+ aiofiles==24.1.0
3
+ aiohappyeyeballs==2.6.1
4
+ aiohttp==3.12.14
5
+ aiosignal==1.4.0
6
+ alabaster==1.0.0
7
+ albucore==0.0.24
8
+ albumentations==1.4.3
9
+ amqp==5.3.1
10
+ annotated-types==0.7.0
11
+ anyio==4.9.0
12
+ argon2-cffi==25.1.0
13
+ argon2-cffi-bindings==21.2.0
14
+ arrow==1.3.0
15
+ asgiref==3.9.1
16
+ asttokens==3.0.0
17
+ async-lru==2.0.5
18
+ asyncio==3.4.3
19
+ attrs==25.3.0
20
+ babel==2.17.0
21
+ backoff==2.2.1
22
+ bcrypt==4.3.0
23
+ beautifulsoup4==4.13.4
24
+ billiard==4.2.1
25
+ bitsandbytes==0.46.1
26
+ bleach==6.2.0
27
+ Brotli==1.1.0
28
+ build==1.3.0
29
+ cachetools==5.5.2
30
+ celery==5.5.3
31
+ certifi==2025.8.3
32
+ cffi==1.17.1
33
+ chardet==5.2.0
34
+ charset-normalizer==3.4.2
35
+ chroma-hnswlib==0.7.3
36
+ chromadb==0.4.24
37
+ click==8.2.1
38
+ click-didyoumean==0.3.1
39
+ click-plugins==1.1.1.2
40
+ click-repl==0.3.0
41
+ colorama==0.4.6
42
+ coloredlogs==15.0.1
43
+ comm==0.2.3
44
+ contourpy==1.3.3
45
+ cryptography==45.0.5
46
+ cycler==0.12.1
47
+ dataclasses-json==0.6.7
48
+ datasets==4.0.0
49
+ debugpy==1.8.15
50
+ decorator==5.2.1
51
+ defusedxml==0.7.1
52
+ dill==0.3.8
53
+ distro==1.9.0
54
+ docutils==0.21.2
55
+ dotenv==0.9.9
56
+ durationpy==0.10
57
+ easyocr==1.7.2
58
+ einops==0.8.1
59
+ einx==0.3.0
60
+ emoji==2.14.1
61
+ entmax==1.3
62
+ et_xmlfile==2.0.0
63
+ executing==2.2.0
64
+ faiss-cpu==1.8.0
65
+ fastapi==0.116.1
66
+ fastjsonschema==2.21.1
67
+ ffmpy==0.6.1
68
+ filelock==3.18.0
69
+ filetype==1.2.0
70
+ flatbuffers==25.2.10
71
+ fonttools==4.59.0
72
+ fqdn==1.5.1
73
+ frozendict==2.4.6
74
+ frozenlist==1.7.0
75
+ fsspec==2025.7.0
76
+ git-filter-repo==2.47.0
77
+ google-auth==2.40.3
78
+ googleapis-common-protos==1.70.0
79
+ gradio==5.41.0
80
+ gradio_client==1.11.0
81
+ greenlet==3.2.3
82
+ groovy==0.1.2
83
+ grpcio==1.74.0
84
+ h11==0.16.0
85
+ h2==4.2.0
86
+ hpack==4.1.0
87
+ httpcore==1.0.9
88
+ httptools==0.6.4
89
+ httpx==0.28.1
90
+ httpx-sse==0.4.1
91
+ huggingface-hub==0.34.3
92
+ humanfriendly==10.0
93
+ hyperframe==6.1.0
94
+ idna==3.10
95
+ imageio==2.37.0
96
+ imagesize==1.4.1
97
+ importlib_metadata==8.7.0
98
+ importlib_resources==6.5.2
99
+ ipykernel==6.30.0
100
+ ipython==9.4.0
101
+ ipython_pygments_lexers==1.1.1
102
+ ipywidgets==8.1.7
103
+ isoduration==20.11.0
104
+ jedi==0.19.2
105
+ Jinja2==3.1.6
106
+ joblib==1.5.1
107
+ json5==0.12.0
108
+ jsonpatch==1.33
109
+ jsonpointer==3.0.0
110
+ jsonschema==4.25.0
111
+ jsonschema-specifications==2025.4.1
112
+ jupyter==1.1.1
113
+ jupyter-console==6.6.3
114
+ jupyter-events==0.12.0
115
+ jupyter-lsp==2.2.6
116
+ jupyter_client==8.6.3
117
+ jupyter_core==5.8.1
118
+ jupyter_server==2.16.0
119
+ jupyter_server_terminals==0.5.3
120
+ jupyterlab==4.4.5
121
+ jupyterlab_pygments==0.3.0
122
+ jupyterlab_server==2.27.3
123
+ jupyterlab_widgets==3.0.15
124
+ kiwisolver==1.4.8
125
+ kombu==5.5.4
126
+ kubernetes==33.1.0
127
+ langchain==0.3.27
128
+ langchain-community==0.3.27
129
+ langchain-core==0.3.72
130
+ langchain-huggingface==0.3.1
131
+ langchain-text-splitters==0.3.9
132
+ langdetect==1.0.9
133
+ langsmith==0.4.10
134
+ lark==1.2.2
135
+ lazy_loader==0.4
136
+ loguru==0.7.3
137
+ lxml==6.0.0
138
+ markdown-it-py==3.0.0
139
+ MarkupSafe==3.0.2
140
+ marshmallow==3.26.1
141
+ matplotlib==3.10.3
142
+ matplotlib-inline==0.1.7
143
+ mdit-py-plugins==0.4.2
144
+ mdurl==0.1.2
145
+ mistune==3.1.3
146
+ mmh3==5.2.0
147
+ mpmath==1.3.0
148
+ multidict==6.6.3
149
+ multiprocess==0.70.16
150
+ munch==4.0.0
151
+ mypy_extensions==1.1.0
152
+ myst-parser==4.0.1
153
+ nbclient==0.10.2
154
+ nbconvert==7.16.6
155
+ nbformat==5.10.4
156
+ nest-asyncio==1.6.0
157
+ networkx==3.5
158
+ ninja==1.11.1.4
159
+ nltk==3.8.1
160
+ notebook==7.4.4
161
+ notebook_shim==0.2.4
162
+ numpy==2.2.6
163
+ oauthlib==3.3.1
164
+ onnxruntime==1.22.1
165
+ opencv-python-headless==4.12.0.88
166
+ openpyxl==3.1.5
167
+ opentelemetry-api==1.36.0
168
+ opentelemetry-exporter-otlp-proto-common==1.36.0
169
+ opentelemetry-exporter-otlp-proto-grpc==1.36.0
170
+ opentelemetry-instrumentation==0.57b0
171
+ opentelemetry-instrumentation-asgi==0.57b0
172
+ opentelemetry-instrumentation-fastapi==0.57b0
173
+ opentelemetry-proto==1.36.0
174
+ opentelemetry-sdk==1.36.0
175
+ opentelemetry-semantic-conventions==0.57b0
176
+ opentelemetry-util-http==0.57b0
177
+ orjson==3.11.1
178
+ overrides==7.7.0
179
+ packaging==25.0
180
+ pandas==2.3.1
181
+ pandocfilters==1.5.1
182
+ parso==0.8.4
183
+ peft==0.8.0
184
+ pillow==11.3.0
185
+ platformdirs==4.3.8
186
+ portalocker==3.2.0
187
+ posthog==6.3.3
188
+ prometheus_client==0.22.1
189
+ prompt_toolkit==3.0.51
190
+ propcache==0.3.2
191
+ protobuf==6.31.1
192
+ psutil==7.0.0
193
+ pulsar-client==3.8.0
194
+ pure_eval==0.2.3
195
+ pyarrow==21.0.0
196
+ pyasn1==0.6.1
197
+ pyasn1_modules==0.4.2
198
+ pyclipper==1.3.0.post6
199
+ pycparser==2.22
200
+ pydantic==2.11.7
201
+ pydantic-settings==2.10.1
202
+ pydantic_core==2.33.2
203
+ pydub==0.25.1
204
+ Pygments==2.19.2
205
+ PyJWT==2.8.0
206
+ PyMuPDF==1.23.8
207
+ PyMuPDFb==1.23.7
208
+ pyparsing==3.2.3
209
+ pypdf==4.1.0
210
+ PyPika==0.48.9
211
+ pyproject_hooks==1.2.0
212
+ pyreadline3==3.5.4
213
+ pytesseract==0.3.13
214
+ python-bidi==0.6.6
215
+ python-dateutil==2.9.0.post0
216
+ python-docx==1.1.2
217
+ python-dotenv==1.1.1
218
+ python-iso639==2025.2.18
219
+ python-json-logger==3.3.0
220
+ python-magic==0.4.27
221
+ python-multipart==0.0.20
222
+ python-pptx==1.0.2
223
+ pytz==2025.2
224
+ pywin32==311
225
+ pywinpty==2.0.15
226
+ PyYAML==6.0.2
227
+ pyzmq==27.0.0
228
+ qdrant-client==1.15.1
229
+ RapidFuzz==3.13.0
230
+ redis==6.2.0
231
+ referencing==0.36.2
232
+ regex==2025.7.34
233
+ requests==2.32.4
234
+ requests-oauthlib==2.0.0
235
+ requests-toolbelt==1.0.0
236
+ rfc3339-validator==0.1.4
237
+ rfc3986-validator==0.1.1
238
+ rfc3987-syntax==1.1.0
239
+ rich==14.1.0
240
+ roman-numerals-py==3.1.0
241
+ rpds-py==0.26.0
242
+ rsa==4.9.1
243
+ ruff==0.12.7
244
+ safehttpx==0.1.6
245
+ safetensors==0.5.3
246
+ scikit-image==0.25.2
247
+ scikit-learn==1.7.1
248
+ scipy==1.16.1
249
+ seaborn==0.13.2
250
+ semantic-version==2.10.0
251
+ Send2Trash==1.8.3
252
+ sentence-transformers==3.0.0
253
+ sentencepiece==0.2.0
254
+ shapely==2.1.1
255
+ shellingham==1.5.4
256
+ simsimd==6.5.0
257
+ six==1.17.0
258
+ sniffio==1.3.1
259
+ snowballstemmer==3.0.1
260
+ soupsieve==2.7
261
+ Sphinx==8.2.3
262
+ sphinxcontrib-applehelp==2.0.0
263
+ sphinxcontrib-devhelp==2.0.0
264
+ sphinxcontrib-htmlhelp==2.1.0
265
+ sphinxcontrib-jsmath==1.0.1
266
+ sphinxcontrib-qthelp==2.0.0
267
+ sphinxcontrib-serializinghtml==2.0.0
268
+ SQLAlchemy==2.0.42
269
+ stack-data==0.6.3
270
+ starlette==0.47.2
271
+ stringzilla==3.12.5
272
+ sympy==1.14.0
273
+ tabulate==0.9.0
274
+ tenacity==8.5.0
275
+ terminado==0.18.1
276
+ threadpoolctl==3.6.0
277
+ tifffile==2025.6.11
278
+ timm==1.0.19
279
+ tinycss2==1.4.0
280
+ tokenizers==0.21.4
281
+ tomlkit==0.13.3
282
+ torch==2.7.1
283
+ torchvision==0.22.1
284
+ tornado==6.5.1
285
+ tqdm==4.67.1
286
+ traitlets==5.14.3
287
+ transformers==4.54.1
288
+ transformers_modules==0.4.0
289
+ trl==0.19.1
290
+ typer==0.16.0
291
+ types-python-dateutil==2.9.0.20250708
292
+ typing-inspect==0.9.0
293
+ typing-inspection==0.4.1
294
+ typing_extensions==4.14.1
295
+ tzdata==2025.2
296
+ unstructured==0.12.4
297
+ unstructured-client==0.42.0
298
+ uri-template==1.3.0
299
+ urllib3==2.5.0
300
+ uvicorn==0.35.0
301
+ vine==5.1.0
302
+ watchfiles==1.1.0
303
+ wcwidth==0.2.13
304
+ webcolors==24.11.1
305
+ webencodings==0.5.1
306
+ websocket-client==1.8.0
307
+ websockets==15.0.1
308
+ widgetsnbextension==4.0.14
309
+ win32_setctime==1.2.0
310
+ wrapt==1.17.2
311
+ x-transformers==0.15.0
312
+ xlsxwriter==3.2.5
313
+ xxhash==3.5.0
314
+ yarl==1.20.1
315
+ zipp==3.23.0
316
+ zstandard==0.23.0
requirements_full_250819_2030.txt ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.30.1
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.12.15
4
+ aiosignal==1.4.0
5
+ amqp==5.3.1
6
+ annotated-types==0.7.0
7
+ anyio==4.10.0
8
+ attrs==25.3.0
9
+ bcrypt==4.3.0
10
+ billiard==4.2.1
11
+ bitsandbytes==0.47.0
12
+ celery==5.5.3
13
+ certifi==2025.8.3
14
+ cffi==1.17.1
15
+ charset-normalizer==3.4.3
16
+ click==8.2.1
17
+ click-didyoumean==0.3.1
18
+ click-plugins==1.1.1.2
19
+ click-repl==0.3.0
20
+ colorama==0.4.6
21
+ cryptography==45.0.6
22
+ dataclasses-json==0.6.7
23
+ easyocr==1.7.2
24
+ ecdsa==0.19.1
25
+ einops==0.8.1
26
+ faiss-cpu==1.12.0
27
+ fastapi==0.116.1
28
+ filelock==3.19.1
29
+ frozenlist==1.7.0
30
+ fsspec==2025.7.0
31
+ greenlet==3.2.4
32
+ h11==0.16.0
33
+ httpcore==1.0.9
34
+ httptools==0.6.4
35
+ httpx==0.28.1
36
+ httpx-sse==0.4.1
37
+ huggingface-hub==0.34.4
38
+ idna==3.10
39
+ imageio==2.37.0
40
+ intel-openmp==2021.4.0
41
+ Jinja2==3.1.6
42
+ joblib==1.5.1
43
+ jsonpatch==1.33
44
+ jsonpointer==3.0.0
45
+ kombu==5.5.4
46
+ langchain==0.3.27
47
+ langchain-community==0.3.27
48
+ langchain-core==0.3.74
49
+ langchain-text-splitters==0.3.9
50
+ langsmith==0.4.14
51
+ lazy_loader==0.4
52
+ lxml==6.0.0
53
+ markdown-it-py==3.0.0
54
+ MarkupSafe==3.0.2
55
+ marshmallow==3.26.1
56
+ mdurl==0.1.2
57
+ mkl==2021.4.0
58
+ mpmath==1.3.0
59
+ multidict==6.6.4
60
+ mypy_extensions==1.1.0
61
+ networkx==3.5
62
+ ninja==1.13.0
63
+ nltk==3.9.1
64
+ numpy==1.26.4
65
+ opencv-python-headless==4.11.0.86
66
+ orjson==3.11.2
67
+ packaging==25.0
68
+ pandas==2.3.1
69
+ passlib==1.7.4
70
+ peft==0.10.0
71
+ pillow==11.3.0
72
+ prompt_toolkit==3.0.51
73
+ propcache==0.3.2
74
+ psutil==7.0.0
75
+ pyasn1==0.6.1
76
+ pyclipper==1.3.0.post6
77
+ pycparser==2.22
78
+ pydantic==2.11.7
79
+ pydantic-settings==2.10.1
80
+ pydantic_core==2.33.2
81
+ PyJWT==2.10.1
82
+ PyMuPDF==1.26.3
83
+ pytesseract==0.3.13
84
+ python-bidi==0.6.6
85
+ python-dateutil==2.9.0.post0
86
+ python-docx==1.2.0
87
+ python-dotenv==1.1.1
88
+ python-jose==3.5.0
89
+ python-json-logger==3.3.0
90
+ python-multipart==0.0.20
91
+ python-pptx==1.0.2
92
+ pytz==2025.2
93
+ PyYAML==6.0.2
94
+ redis==6.4.0
95
+ regex==2025.7.34
96
+ requests==2.32.5
97
+ requests-toolbelt==1.0.0
98
+ rsa==4.9.1
99
+ safetensors==0.6.2
100
+ scikit-image==0.25.2
101
+ scikit-learn==1.7.1
102
+ scipy==1.16.1
103
+ sentence-transformers==2.2.2
104
+ sentencepiece==0.2.1
105
+ shapely==2.1.1
106
+ six==1.17.0
107
+ sniffio==1.3.1
108
+ SQLAlchemy==2.0.43
109
+ starlette==0.47.2
110
+ sympy==1.14.0
111
+ tbb==2021.13.1
112
+ tenacity==9.1.2
113
+ threadpoolctl==3.6.0
114
+ tifffile==2025.6.11
115
+ timm==1.0.19
116
+ tokenizers==0.21.4
117
+ torch==2.3.1
118
+ torchvision==0.18.1
119
+ tqdm==4.67.1
120
+ transformers==4.55.2
121
+ typing-inspect==0.9.0
122
+ typing-inspection==0.4.1
123
+ typing_extensions==4.14.1
124
+ tzdata==2025.2
125
+ urllib3==2.5.0
126
+ uvicorn==0.35.0
127
+ vine==5.1.0
128
+ watchfiles==1.1.0
129
+ wcwidth==0.2.13
130
+ websockets==15.0.1
131
+ xlsxwriter==3.2.5
132
+ yarl==1.20.1
133
+ zstandard==0.24.0
test_context_lora.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ ๋ฐ LoRA ๊ธฐ๋Šฅ ํ…Œ์ŠคํŠธ ์Šคํฌ๋ฆฝํŠธ
4
+ """
5
+
6
+ import requests
7
+ import json
8
+ import time
9
+ from typing import Dict, Any
10
+
11
+ class LilyLLMTester:
12
+ """Lily LLM API ํ…Œ์ŠคํŠธ ํด๋ž˜์Šค"""
13
+
14
+ def __init__(self, base_url: str = "http://localhost:8001"):
15
+ self.base_url = base_url
16
+ self.session = requests.Session()
17
+
18
+ def test_context_management(self):
19
+ """์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ ๊ธฐ๋Šฅ ํ…Œ์ŠคํŠธ"""
20
+ print("๐Ÿ”ง ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ ๊ธฐ๋Šฅ ํ…Œ์ŠคํŠธ ์‹œ์ž‘")
21
+ print("=" * 50)
22
+
23
+ # 1. ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ์„ค์ •
24
+ print("1. ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ์„ค์ •...")
25
+ response = self.session.post(
26
+ f"{self.base_url}/context/set-system-prompt",
27
+ data={"prompt": "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด๋กœ ๋Œ€ํ™”ํ•˜๋Š” AI ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค. ๋ชจ๋“  ์‘๋‹ต์€ ํ•œ๊ตญ์–ด๋กœ ํ•ด์ฃผ์„ธ์š”."}
28
+ )
29
+ print(f" ๊ฒฐ๊ณผ: {response.json()}")
30
+
31
+ # 2. ์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€
32
+ print("\n2. ์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€...")
33
+ response = self.session.post(
34
+ f"{self.base_url}/context/add-message",
35
+ data={
36
+ "role": "user",
37
+ "content": "์•ˆ๋…•ํ•˜์„ธ์š”! ์˜ค๋Š˜ ๋‚ ์”จ๊ฐ€ ์–ด๋•Œ์š”?",
38
+ "metadata": json.dumps({"session_id": "test_session_1"})
39
+ }
40
+ )
41
+ print(f" ๊ฒฐ๊ณผ: {response.json()}")
42
+
43
+ # 3. ์–ด์‹œ์Šคํ„ดํŠธ ์‘๋‹ต ์ถ”๊ฐ€
44
+ print("\n3. ์–ด์‹œ์Šคํ„ดํŠธ ์‘๋‹ต ์ถ”๊ฐ€...")
45
+ response = self.session.post(
46
+ f"{self.base_url}/context/add-message",
47
+ data={
48
+ "role": "assistant",
49
+ "content": "์•ˆ๋…•ํ•˜์„ธ์š”! ์˜ค๋Š˜์€ ๋ง‘๊ณ  ํ™”์ฐฝํ•œ ๋‚ ์”จ์ž…๋‹ˆ๋‹ค. ๊ธฐ์˜จ์€ 20๋„ ์ •๋„๋กœ ์พŒ์ ํ•ฉ๋‹ˆ๋‹ค.",
50
+ "metadata": json.dumps({"session_id": "test_session_1"})
51
+ }
52
+ )
53
+ print(f" ๊ฒฐ๊ณผ: {response.json()}")
54
+
55
+ # 4. ์ปจํ…์ŠคํŠธ ์กฐํšŒ
56
+ print("\n4. ์ปจํ…์ŠคํŠธ ์กฐํšŒ...")
57
+ response = self.session.get(f"{self.base_url}/context/get")
58
+ print(f" ๊ฒฐ๊ณผ: {response.json()}")
59
+
60
+ # 5. ์ปจํ…์ŠคํŠธ ์š”์•ฝ
61
+ print("\n5. ์ปจํ…์ŠคํŠธ ์š”์•ฝ...")
62
+ response = self.session.get(f"{self.base_url}/context/summary")
63
+ print(f" ๊ฒฐ๊ณผ: {response.json()}")
64
+
65
+ # 6. ์ปจํ…์ŠคํŠธ ๊ฒ€์ƒ‰
66
+ print("\n6. ์ปจํ…์ŠคํŠธ ๊ฒ€์ƒ‰...")
67
+ response = self.session.get(f"{self.base_url}/context/search?query=๋‚ ์”จ")
68
+ print(f" ๊ฒฐ๊ณผ: {response.json()}")
69
+
70
+ print("\nโœ… ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ ๊ธฐ๋Šฅ ํ…Œ์ŠคํŠธ ์™„๋ฃŒ")
71
+
72
+ def test_lora_management(self):
73
+ """LoRA ๊ด€๋ฆฌ ๊ธฐ๋Šฅ ํ…Œ์ŠคํŠธ"""
74
+ print("\n๐Ÿ”ง LoRA ๊ด€๋ฆฌ ๊ธฐ๋Šฅ ํ…Œ์ŠคํŠธ ์‹œ์ž‘")
75
+ print("=" * 50)
76
+
77
+ # 1. ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ์–ด๋Œ‘ํ„ฐ ๋ชฉ๋ก
78
+ print("1. ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ LoRA ์–ด๋Œ‘ํ„ฐ ๋ชฉ๋ก...")
79
+ response = self.session.get(f"{self.base_url}/lora/adapters")
80
+ print(f" ๊ฒฐ๊ณผ: {response.json()}")
81
+
82
+ # 2. LoRA ํ†ต๊ณ„ (์–ด๋Œ‘ํ„ฐ๊ฐ€ ๋กœ๋“œ๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ)
83
+ print("\n2. LoRA ํ†ต๊ณ„...")
84
+ response = self.session.get(f"{self.base_url}/lora/stats")
85
+ print(f" ๊ฒฐ๊ณผ: {response.json()}")
86
+
87
+ # 3. LoRA ์„ค์ • ์ƒ์„ฑ
88
+ print("\n3. LoRA ์„ค์ • ์ƒ์„ฑ...")
89
+ response = self.session.post(
90
+ f"{self.base_url}/lora/create-config",
91
+ data={
92
+ "r": 16,
93
+ "lora_alpha": 32,
94
+ "target_modules": "q_proj,v_proj,k_proj,o_proj",
95
+ "lora_dropout": 0.1,
96
+ "bias": "none",
97
+ "task_type": "CAUSAL_LM"
98
+ }
99
+ )
100
+ print(f" ๊ฒฐ๊ณผ: {response.json()}")
101
+
102
+ print("\nโœ… LoRA ๊ด€๋ฆฌ ๊ธฐ๋Šฅ ํ…Œ์ŠคํŠธ ์™„๋ฃŒ")
103
+
104
+ def test_integrated_generation(self):
105
+ """ํ†ตํ•ฉ ์ƒ์„ฑ ๊ธฐ๋Šฅ ํ…Œ์ŠคํŠธ (์ปจํ…์ŠคํŠธ ํฌํ•จ)"""
106
+ print("\n๐Ÿ”ง ํ†ตํ•ฉ ์ƒ์„ฑ ๊ธฐ๋Šฅ ํ…Œ์ŠคํŠธ ์‹œ์ž‘")
107
+ print("=" * 50)
108
+
109
+ # 1. ์ปจํ…์ŠคํŠธ๋ฅผ ์‚ฌ์šฉํ•œ ํ…์ŠคํŠธ ์ƒ์„ฑ
110
+ print("1. ์ปจํ…์ŠคํŠธ๋ฅผ ์‚ฌ์šฉํ•œ ํ…์ŠคํŠธ ์ƒ์„ฑ...")
111
+ response = self.session.post(
112
+ f"{self.base_url}/generate",
113
+ data={
114
+ "prompt": "๊ทธ๋Ÿผ ๋‚ด์ผ์€ ์–ด๋–ค ๋‚ ์”จ๊ฐ€ ๋ ๊นŒ์š”?",
115
+ "use_context": "true",
116
+ "session_id": "test_session_1"
117
+ }
118
+ )
119
+ print(f" ๊ฒฐ๊ณผ: {response.json()}")
120
+
121
+ # 2. ์ปจํ…์ŠคํŠธ ์—†์ด ํ…์ŠคํŠธ ์ƒ์„ฑ
122
+ print("\n2. ์ปจํ…์ŠคํŠธ ์—†์ด ํ…์ŠคํŠธ ์ƒ์„ฑ...")
123
+ response = self.session.post(
124
+ f"{self.base_url}/generate",
125
+ data={
126
+ "prompt": "๊ฐ„๋‹จํ•œ ์ธ์‚ฌ๋ง์„ ํ•ด์ฃผ์„ธ์š”.",
127
+ "use_context": "false"
128
+ }
129
+ )
130
+ print(f" ๊ฒฐ๊ณผ: {response.json()}")
131
+
132
+ print("\nโœ… ํ†ตํ•ฉ ์ƒ์„ฑ ๊ธฐ๋Šฅ ํ…Œ์ŠคํŠธ ์™„๋ฃŒ")
133
+
134
+ def test_context_export_import(self):
135
+ """์ปจํ…์ŠคํŠธ ๋‚ด๋ณด๋‚ด๊ธฐ/๊ฐ€์ ธ์˜ค๊ธฐ ํ…Œ์ŠคํŠธ"""
136
+ print("\n๐Ÿ”ง ์ปจํ…์ŠคํŠธ ๋‚ด๋ณด๋‚ด๊ธฐ/๊ฐ€์ ธ์˜ค๊ธฐ ํ…Œ์ŠคํŠธ ์‹œ์ž‘")
137
+ print("=" * 50)
138
+
139
+ # 1. ์ปจํ…์ŠคํŠธ ๋‚ด๋ณด๋‚ด๊ธฐ
140
+ print("1. ์ปจํ…์ŠคํŠธ ๋‚ด๋ณด๋‚ด๊ธฐ...")
141
+ response = self.session.post(
142
+ f"{self.base_url}/context/export",
143
+ data={"file_path": "test_context_export.json"}
144
+ )
145
+ print(f" ๊ฒฐ๊ณผ: {response.json()}")
146
+
147
+ # 2. ์ปจํ…์ŠคํŠธ ์ดˆ๊ธฐํ™”
148
+ print("\n2. ์ปจํ…์ŠคํŠธ ์ดˆ๊ธฐํ™”...")
149
+ response = self.session.post(f"{self.base_url}/context/clear")
150
+ print(f" ๊ฒฐ๊ณผ: {response.json()}")
151
+
152
+ # 3. ์ปจํ…์ŠคํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ
153
+ print("\n3. ์ปจํ…์ŠคํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ...")
154
+ response = self.session.post(
155
+ f"{self.base_url}/context/import",
156
+ data={"file_path": "test_context_export.json"}
157
+ )
158
+ print(f" ๊ฒฐ๊ณผ: {response.json()}")
159
+
160
+ print("\nโœ… ์ปจํ…์ŠคํŠธ ๋‚ด๋ณด๋‚ด๊ธฐ/๊ฐ€์ ธ์˜ค๊ธฐ ํ…Œ์ŠคํŠธ ์™„๋ฃŒ")
161
+
162
+ def test_rag_functionality(self):
163
+ """RAG ๊ธฐ๋Šฅ ํ…Œ์ŠคํŠธ"""
164
+ print("\n๐Ÿ”ง RAG ๊ธฐ๋Šฅ ํ…Œ์ŠคํŠธ ์‹œ์ž‘")
165
+ print("=" * 50)
166
+
167
+ # 1. ํ—ฌ์Šค ์ฒดํฌ
168
+ print("1. ์„œ๋ฒ„ ์ƒํƒœ ํ™•์ธ...")
169
+ response = self.session.get(f"{self.base_url}/health")
170
+ print(f" ๊ฒฐ๊ณผ: {response.json()}")
171
+
172
+ # 2. ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ ๋ชฉ๋ก
173
+ print("\n2. ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ ๋ชฉ๋ก...")
174
+ response = self.session.get(f"{self.base_url}/models")
175
+ print(f" ๊ฒฐ๊ณผ: {response.json()}")
176
+
177
+ print("\nโœ… RAG ๊ธฐ๋Šฅ ํ…Œ์ŠคํŠธ ์™„๋ฃŒ")
178
+
179
+ def run_all_tests(self):
180
+ """๋ชจ๋“  ํ…Œ์ŠคํŠธ ์‹คํ–‰"""
181
+ print("๐Ÿš€ Lily LLM ํ†ตํ•ฉ ํ…Œ์ŠคํŠธ ์‹œ์ž‘")
182
+ print("=" * 60)
183
+
184
+ try:
185
+ # ๊ธฐ๋ณธ ๊ธฐ๋Šฅ ํ…Œ์ŠคํŠธ
186
+ self.test_rag_functionality()
187
+
188
+ # ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ ํ…Œ์ŠคํŠธ
189
+ self.test_context_management()
190
+
191
+ # LoRA ๊ด€๋ฆฌ ํ…Œ์ŠคํŠธ
192
+ self.test_lora_management()
193
+
194
+ # ํ†ตํ•ฉ ์ƒ์„ฑ ํ…Œ์ŠคํŠธ
195
+ self.test_integrated_generation()
196
+
197
+ # ์ปจํ…์ŠคํŠธ ๋‚ด๋ณด๋‚ด๊ธฐ/๊ฐ€์ ธ์˜ค๊ธฐ ํ…Œ์ŠคํŠธ
198
+ self.test_context_export_import()
199
+
200
+ print("\n๐ŸŽ‰ ๋ชจ๋“  ํ…Œ์ŠคํŠธ๊ฐ€ ์„ฑ๊ณต์ ์œผ๋กœ ์™„๋ฃŒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค!")
201
+
202
+ except Exception as e:
203
+ print(f"\nโŒ ํ…Œ์ŠคํŠธ ์‹คํ–‰ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
204
+ import traceback
205
+ traceback.print_exc()
206
+
207
+ def main():
208
+ """๋ฉ”์ธ ํ•จ์ˆ˜"""
209
+ print("Lily LLM API ํ…Œ์ŠคํŠธ ๋„๊ตฌ")
210
+ print("=" * 40)
211
+
212
+ # ์„œ๋ฒ„ URL ์ž…๋ ฅ
213
+ base_url = input("์„œ๋ฒ„ URL์„ ์ž…๋ ฅํ•˜์„ธ์š” (๊ธฐ๋ณธ๊ฐ’: http://localhost:8001): ").strip()
214
+ if not base_url:
215
+ base_url = "http://localhost:8001"
216
+
217
+ # ํ…Œ์Šคํ„ฐ ์ƒ์„ฑ ๋ฐ ์‹คํ–‰
218
+ tester = LilyLLMTester(base_url)
219
+
220
+ try:
221
+ tester.run_all_tests()
222
+ except KeyboardInterrupt:
223
+ print("\n\nโน๏ธ ํ…Œ์ŠคํŠธ๊ฐ€ ์‚ฌ์šฉ์ž์— ์˜ํ•ด ์ค‘๋‹จ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
224
+ except Exception as e:
225
+ print(f"\n\nโŒ ์˜ˆ์ƒ์น˜ ๋ชปํ•œ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}")
226
+
227
+ if __name__ == "__main__":
228
+ main()