gbrabbit commited on
Commit
ca781ff
Β·
1 Parent(s): eddb502

Auto commit at 09-2025-08 20:34:30

Browse files
lily_llm_api/app_v2.py CHANGED
@@ -176,7 +176,7 @@ def select_model_interactive():
176
  try:
177
  # choice = input(f"\nπŸ“ μ‚¬μš©ν•  λͺ¨λΈ 번호λ₯Ό μ„ νƒν•˜μ„Έμš” (1-{len(available_models)}): ")
178
  # selected_model = available_models[int(choice) - 1]
179
- selected_model = available_models[1]
180
  print(f"\nβœ… '{selected_model['name']}' λͺ¨λΈμ„ μ„ νƒν–ˆμŠ΅λ‹ˆλ‹€.")
181
  return selected_model['model_id']
182
  except (ValueError, IndexError):
@@ -292,26 +292,55 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
292
  for key in all_image_metas[0].keys():
293
  combined_image_metas[key] = [meta[key] for meta in all_image_metas]
294
 
295
- # --- 2. ν”„λ‘¬ν”„νŠΈ ꡬ성 (이미지 μœ λ¬΄μ— 관계없이 곡톡 μ‹€ν–‰) ---
296
  image_tokens = "<image>" * len(all_pixel_values)
297
- # ν…μŠ€νŠΈμ™€ λ©€ν‹°λͺ¨λ‹¬ λͺ¨λ‘ λ™μΌν•œ ν”„λ‘¬ν”„νŠΈ ν…œν”Œλ¦Ώ μ‚¬μš©
298
  formatted_prompt = f"<|im_start|>user\n{image_tokens}{prompt}<|im_end|>\n<|im_start|>assistant\n"
299
 
300
- # --- 3. ν† ν¬λ‚˜μ΄μ§• (곡톡 μ‹€ν–‰) ---
301
- # ν…μŠ€νŠΈμ™€ λ©€ν‹°λͺ¨λ‹¬ λͺ¨λ‘ λ™μΌν•œ μ»€μŠ€ν…€ ν† ν¬λ‚˜μ΄μ € ν•¨μˆ˜ μ‚¬μš©
302
- inputs = tokenizer.encode_prompt(prompt=formatted_prompt, image_meta=combined_image_metas)
303
-
304
- input_ids = inputs['input_ids'].unsqueeze(0).to(model.device)
305
- attention_mask = inputs['attention_mask'].unsqueeze(0).to(model.device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
  # --- 4. λͺ¨λΈ 생성 (곡톡 μ‹€ν–‰) ---
308
  gen_config = current_profile.get_generation_config()
 
 
 
309
 
310
  # max_length λ“± μ‚¬μš©μž μ§€μ • νŒŒλΌλ―Έν„°κ°€ 있으면 gen_config에 반영
311
  if max_length is not None: gen_config['max_new_tokens'] = max_length
312
  if temperature is not None: gen_config['temperature'] = temperature
313
  if top_p is not None: gen_config['top_p'] = top_p
314
  if do_sample is not None: gen_config['do_sample'] = do_sample
 
 
 
 
 
315
 
316
  with torch.no_grad():
317
  if image_processed:
@@ -364,8 +393,17 @@ async def generate(prompt: str = Form(...),
364
  image_data = await img.read()
365
  image_data_list.append(image_data)
366
 
367
- result = await loop.run_in_executor(executor, generate_sync, prompt, image_data_list,
368
- max_length, temperature, top_p, do_sample)
 
 
 
 
 
 
 
 
 
369
 
370
  processing_time = time.time() - start_time
371
  logger.info(f"βœ… 생성 μ™„λ£Œ ({processing_time:.2f}초), 이미지 처리: {result['image_processed']}")
 
176
  try:
177
  # choice = input(f"\nπŸ“ μ‚¬μš©ν•  λͺ¨λΈ 번호λ₯Ό μ„ νƒν•˜μ„Έμš” (1-{len(available_models)}): ")
178
  # selected_model = available_models[int(choice) - 1]
179
+ selected_model = available_models[0]
180
  print(f"\nβœ… '{selected_model['name']}' λͺ¨λΈμ„ μ„ νƒν–ˆμŠ΅λ‹ˆλ‹€.")
181
  return selected_model['model_id']
182
  except (ValueError, IndexError):
 
292
  for key in all_image_metas[0].keys():
293
  combined_image_metas[key] = [meta[key] for meta in all_image_metas]
294
 
295
+ # --- 2. ν”„λ‘¬ν”„νŠΈ ꡬ성 ---
296
  image_tokens = "<image>" * len(all_pixel_values)
297
+ # Kanana κΈ°λ³Έ 포맷. ν…μŠ€νŠΈ-only λͺ¨λΈμ€ profile.format_prompt둜 λŒ€μ²΄λ¨
298
  formatted_prompt = f"<|im_start|>user\n{image_tokens}{prompt}<|im_end|>\n<|im_start|>assistant\n"
299
 
300
+ # --- 3. ν† ν¬λ‚˜μ΄μ§• ---
301
+ if hasattr(tokenizer, 'encode_prompt'):
302
+ inputs = tokenizer.encode_prompt(prompt=formatted_prompt, image_meta=combined_image_metas)
303
+ input_ids = inputs['input_ids']
304
+ attention_mask = inputs['attention_mask']
305
+ else:
306
+ # ν…μŠ€νŠΈ-only λͺ¨λΈμ˜ ꢌμž₯ ν”„λ‘¬ν”„νŠΈ μ‚¬μš©
307
+ if not getattr(current_profile, 'multimodal', False) and hasattr(current_profile, 'format_prompt'):
308
+ formatted_prompt = current_profile.format_prompt(prompt)
309
+ inputs = tokenizer(
310
+ formatted_prompt,
311
+ return_tensors="pt",
312
+ padding=True,
313
+ truncation=True,
314
+ max_length=256,
315
+ )
316
+ if 'token_type_ids' in inputs:
317
+ del inputs['token_type_ids']
318
+ input_ids = inputs['input_ids']
319
+ attention_mask = inputs['attention_mask']
320
+
321
+ if input_ids.dim() == 1:
322
+ input_ids = input_ids.unsqueeze(0)
323
+ if attention_mask.dim() == 1:
324
+ attention_mask = attention_mask.unsqueeze(0)
325
+ input_ids = input_ids.to(model.device)
326
+ attention_mask = attention_mask.to(model.device)
327
 
328
  # --- 4. λͺ¨λΈ 생성 (곡톡 μ‹€ν–‰) ---
329
  gen_config = current_profile.get_generation_config()
330
+ # CPUμ—μ„œ κ³Όλ„ν•œ max_new_tokensλŠ” λŒ€κΈ° μ‹œκ°„μ„ 크게 늘림 β†’ κΈ°λ³Έ μƒν•œμ„ 보수적으둜 μ‘°μ •
331
+ if gen_config.get('max_new_tokens', 256) > 128 and (not torch.cuda.is_available()):
332
+ gen_config['max_new_tokens'] = 128
333
 
334
  # max_length λ“± μ‚¬μš©μž μ§€μ • νŒŒλΌλ―Έν„°κ°€ 있으면 gen_config에 반영
335
  if max_length is not None: gen_config['max_new_tokens'] = max_length
336
  if temperature is not None: gen_config['temperature'] = temperature
337
  if top_p is not None: gen_config['top_p'] = top_p
338
  if do_sample is not None: gen_config['do_sample'] = do_sample
339
+ # pad/eos 보완 (일뢀 ν† ν¬λ‚˜μ΄μ €λŠ” pad_token λ―Έμ •μ˜)
340
+ if gen_config.get('pad_token_id') is None and hasattr(tokenizer, 'pad_token_id'):
341
+ gen_config['pad_token_id'] = tokenizer.pad_token_id or tokenizer.eos_token_id
342
+ if gen_config.get('eos_token_id') is None and hasattr(tokenizer, 'eos_token_id'):
343
+ gen_config['eos_token_id'] = tokenizer.eos_token_id
344
 
345
  with torch.no_grad():
346
  if image_processed:
 
393
  image_data = await img.read()
394
  image_data_list.append(image_data)
395
 
396
+ # 단일 μ‹€ν–‰ 보μž₯: generate_syncλŠ” 였직 ν•œ 번만 호좜
397
+ result = await loop.run_in_executor(
398
+ executor,
399
+ generate_sync,
400
+ prompt,
401
+ image_data_list,
402
+ max_length,
403
+ temperature,
404
+ top_p,
405
+ do_sample,
406
+ )
407
 
408
  processing_time = time.time() - start_time
409
  logger.info(f"βœ… 생성 μ™„λ£Œ ({processing_time:.2f}초), 이미지 처리: {result['image_processed']}")
lily_llm_api/models/__init__.py CHANGED
@@ -15,12 +15,6 @@ from .polyglot_ko_5_8b_chat import PolyglotKo58bChatProfile
15
 
16
  # μ‚¬μš© κ°€λŠ₯ν•œ λͺ¨λΈ ν”„λ‘œν•„λ“€
17
  AVAILABLE_MODELS = {
18
- # "polyglot-ko-1.3b": PolyglotKo13bProfile,
19
- # "dialogpt-medium": DialoGPTMediumProfile,
20
- # "kanana-1.5-2.1b-instruct": Kanana15V21bInstructProfile,
21
- # "kanana-nano-2.1b-instruct": KananaNano21bInstructProfile,
22
- # "mistral-7b-instruct": Mistral7bInstructProfile,
23
- # "polyglot-ko-5.8b": PolyglotKo58bProfile,
24
  "polyglot-ko-1.3b-chat": PolyglotKo13bChatProfile,
25
  "kanana-1.5-v-3b-instruct": Kanana15V3bInstructProfile,
26
  "polyglot-ko-5.8b-chat": PolyglotKo58bChatProfile,
 
15
 
16
  # μ‚¬μš© κ°€λŠ₯ν•œ λͺ¨λΈ ν”„λ‘œν•„λ“€
17
  AVAILABLE_MODELS = {
 
 
 
 
 
 
18
  "polyglot-ko-1.3b-chat": PolyglotKo13bChatProfile,
19
  "kanana-1.5-v-3b-instruct": Kanana15V3bInstructProfile,
20
  "polyglot-ko-5.8b-chat": PolyglotKo58bChatProfile,
lily_llm_api/models/dialogpt_medium.py DELETED
@@ -1,82 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- DialoGPT-medium λͺ¨λΈ ν”„λ‘œν•„
4
- """
5
-
6
- from typing import Dict, Any, Tuple
7
- import torch
8
- from transformers import AutoTokenizer, AutoModelForCausalLM
9
- import logging
10
-
11
- logger = logging.getLogger(__name__)
12
-
13
- class DialoGPTMediumProfile:
14
- """DialoGPT-medium λͺ¨λΈ ν”„λ‘œν•„"""
15
-
16
- def __init__(self):
17
- self.model_name = "microsoft/DialoGPT-medium"
18
- self.local_path = None # μ˜¨λΌμΈμ—μ„œ λ‘œλ“œ
19
- self.display_name = "DialoGPT-medium"
20
- self.description = "μ˜μ–΄ λŒ€ν™”ν˜• λͺ¨λΈ (774M)"
21
- self.language = "en"
22
- self.model_size = "774M"
23
-
24
- def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
25
- """λͺ¨λΈ λ‘œλ“œ"""
26
- logger.info(f"πŸ“₯ {self.display_name} λͺ¨λΈ λ‘œλ“œ 쀑...")
27
-
28
- try:
29
- # μ˜¨λΌμΈμ—μ„œ λͺ¨λΈ λ‘œλ“œ
30
- tokenizer = AutoTokenizer.from_pretrained(self.model_name)
31
- model = AutoModelForCausalLM.from_pretrained(self.model_name)
32
-
33
- if tokenizer.pad_token is None:
34
- tokenizer.pad_token = tokenizer.eos_token
35
-
36
- logger.info(f"βœ… {self.display_name} λͺ¨λΈ λ‘œλ“œ 성곡!")
37
- return model, tokenizer
38
-
39
- except Exception as e:
40
- logger.error(f"❌ {self.display_name} λͺ¨λΈ λ‘œλ“œ μ‹€νŒ¨: {e}")
41
- raise
42
-
43
- def format_prompt(self, user_input: str) -> str:
44
- """ν”„λ‘¬ν”„νŠΈ ν¬λ§·νŒ…"""
45
- return f"User: {user_input}\nAssistant:"
46
-
47
- def extract_response(self, full_text: str, formatted_prompt: str) -> str:
48
- """응닡 μΆ”μΆœ"""
49
- if "Assistant:" in full_text:
50
- response = full_text.split("Assistant:")[-1].strip()
51
- else:
52
- if formatted_prompt in full_text:
53
- response = full_text.replace(formatted_prompt, "").strip()
54
- else:
55
- response = full_text.strip()
56
-
57
- return response
58
-
59
- def get_generation_config(self) -> Dict[str, Any]:
60
- """생성 μ„€μ •"""
61
- return {
62
- "max_new_tokens": 50,
63
- "temperature": 0.9,
64
- "do_sample": True,
65
- "top_k": 50,
66
- "top_p": 0.95,
67
- "repetition_penalty": 1.1,
68
- "no_repeat_ngram_size": 3,
69
- "pad_token_id": None,
70
- "eos_token_id": None
71
- }
72
-
73
- def get_model_info(self) -> Dict[str, Any]:
74
- """λͺ¨λΈ 정보"""
75
- return {
76
- "model_name": self.model_name,
77
- "display_name": self.display_name,
78
- "description": self.description,
79
- "language": self.language,
80
- "model_size": self.model_size,
81
- "local_path": self.local_path
82
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lily_llm_api/models/kanana_1_5_2_1b_instruct.py DELETED
@@ -1,93 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Kanana 1.5 2.1B Instruct λͺ¨λΈ ν”„λ‘œν•„
4
- """
5
-
6
- import torch
7
- from transformers import AutoTokenizer, AutoModelForCausalLM
8
- from typing import Dict, Any, Tuple
9
- import logging
10
-
11
- logger = logging.getLogger(__name__)
12
-
13
- class Kanana15V21bInstructProfile:
14
- """Kanana 1.5 2.1B Instruct λͺ¨λΈ ν”„λ‘œν•„"""
15
-
16
- def __init__(self):
17
- self.model_name = "kakaocorp/kanana-1.5-2.1b-instruct-2505"
18
- self.local_path = "./lily_llm_core/models/kanana-1.5-2.1b-instruct"
19
- self.display_name = "Kanana 1.5 2.1B Instruct"
20
- self.description = "Kakao의 Kanana 1.5 2.1B Instruct λͺ¨λΈ"
21
- self.language = ["ko", "en"]
22
- self.model_size = "2.1B"
23
-
24
- def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
25
- """λͺ¨λΈ λ‘œλ“œ"""
26
- logger.info(f"πŸ“₯ {self.display_name} λͺ¨λΈ λ‘œλ“œ 쀑...")
27
-
28
- try:
29
- # ν† ν¬λ‚˜μ΄μ € λ‘œλ“œ
30
- tokenizer = AutoTokenizer.from_pretrained(
31
- self.local_path,
32
- trust_remote_code=True,
33
- local_files_only=True
34
- )
35
-
36
- # λͺ¨λΈ λ‘œλ“œ
37
- model = AutoModelForCausalLM.from_pretrained(
38
- self.local_path,
39
- torch_dtype=torch.float32,
40
- device_map="cpu",
41
- low_cpu_mem_usage=True,
42
- local_files_only=True
43
- )
44
-
45
- # ν† ν¬λ‚˜μ΄μ € μ„€μ •
46
- if tokenizer.pad_token is None:
47
- tokenizer.pad_token = tokenizer.eos_token
48
- if tokenizer.eos_token is None:
49
- tokenizer.eos_token = "</s>"
50
-
51
- logger.info(f"βœ… {self.display_name} λͺ¨λΈ λ‘œλ“œ 성곡!")
52
- return model, tokenizer
53
-
54
- except Exception as e:
55
- logger.error(f"❌ {self.display_name} λͺ¨λΈ λ‘œλ“œ μ‹€νŒ¨: {e}")
56
- raise
57
-
58
- def format_prompt(self, user_input: str) -> str:
59
- """ν”„λ‘¬ν”„νŠΈ ν¬λ§·νŒ…"""
60
- return f"<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n"
61
-
62
- def extract_response(self, full_text: str, formatted_prompt: str) -> str:
63
- """응닡 μΆ”μΆœ"""
64
- if "<|im_start|>assistant\n" in full_text:
65
- response = full_text.split("<|im_start|>assistant\n")[-1]
66
- if "<|im_end|>" in response:
67
- response = response.split("<|im_end|>")[0]
68
- return response.strip()
69
- return full_text.strip()
70
-
71
- def get_generation_config(self) -> Dict[str, Any]:
72
- """생성 μ„€μ •"""
73
- return {
74
- "max_new_tokens": 512,
75
- "temperature": 0.7,
76
- "top_p": 0.9,
77
- "do_sample": True,
78
- "repetition_penalty": 1.1,
79
- "no_repeat_ngram_size": 3,
80
- "pad_token_id": None, # ν† ν¬λ‚˜μ΄μ €μ—μ„œ 섀정됨
81
- "eos_token_id": None, # ν† ν¬λ‚˜μ΄μ €μ—μ„œ 섀정됨
82
- }
83
-
84
- def get_model_info(self) -> Dict[str, Any]:
85
- """λͺ¨λΈ 정보"""
86
- return {
87
- "model_name": self.model_name,
88
- "display_name": self.display_name,
89
- "description": self.description,
90
- "language": self.language,
91
- "model_size": self.model_size,
92
- "local_path": self.local_path
93
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lily_llm_api/models/kanana_1_5_v_3b_instruct.py CHANGED
@@ -156,12 +156,13 @@ class Kanana15V3bInstructProfile:
156
 
157
  if use_local:
158
  # 둜컬 λͺ¨λΈ: μ»€μŠ€ν…€ λͺ¨λΈλ§ 클래슀 μ‚¬μš©
159
- logger.info("πŸ” DEBUG: 둜컬 λͺ¨λΈ λ‘œλ“œ μ‹œλ„")
 
160
  model = KananaVForConditionalGeneration.from_pretrained(
161
  model_path,
162
  token=HF_TOKEN,
163
  trust_remote_code=True,
164
- torch_dtype=torch.bfloat16,
165
  local_files_only=True,
166
  # low_cpu_mem_usage=True,
167
  ).to(DEVICE)
@@ -176,10 +177,12 @@ class Kanana15V3bInstructProfile:
176
  raise
177
 
178
  logger.info("πŸ” DEBUG: KananaVForConditionalGeneration.from_pretrained 호좜")
 
 
179
  model = KananaVForConditionalGeneration.from_pretrained(
180
  model_path,
181
  token=HF_TOKEN,
182
- torch_dtype=torch.float16,
183
  trust_remote_code=True,
184
  cache_dir="/app/cache/transformers",
185
  # device_map="auto",
 
156
 
157
  if use_local:
158
  # 둜컬 λͺ¨λΈ: μ»€μŠ€ν…€ λͺ¨λΈλ§ 클래슀 μ‚¬μš©
159
+ logger.info("πŸ” DEBUG: 둜컬 λͺ¨λΈ λ‘œλ“œ μ‹œλ„")
160
+ selected_dtype = torch.float16 if DEVICE == "cuda" else torch.float32
161
  model = KananaVForConditionalGeneration.from_pretrained(
162
  model_path,
163
  token=HF_TOKEN,
164
  trust_remote_code=True,
165
+ torch_dtype=selected_dtype,
166
  local_files_only=True,
167
  # low_cpu_mem_usage=True,
168
  ).to(DEVICE)
 
177
  raise
178
 
179
  logger.info("πŸ” DEBUG: KananaVForConditionalGeneration.from_pretrained 호좜")
180
+ # CPU ν™˜κ²½μ—μ„œ float16/bfloat16보닀 float32κ°€ 더 μ•ˆμ •μ μΈ κ²½μš°κ°€ 많음
181
+ selected_dtype = torch.float16 if DEVICE == "cuda" else torch.float32
182
  model = KananaVForConditionalGeneration.from_pretrained(
183
  model_path,
184
  token=HF_TOKEN,
185
+ torch_dtype=selected_dtype,
186
  trust_remote_code=True,
187
  cache_dir="/app/cache/transformers",
188
  # device_map="auto",
lily_llm_api/models/kanana_1_5_v_3b_instruct_250809_0055.py DELETED
@@ -1,256 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Kanana-1.5-v-3b-instruct λͺ¨λΈ ν”„λ‘œν•„ (λ‹¨μˆœ λ‘œλ”© μ΅œμ’…λ³Έ)
4
- """
5
- import sys
6
- from typing import Dict, Any, Tuple
7
- import torch
8
- import logging
9
- from transformers import AutoTokenizer
10
- import os
11
- from dotenv import load_dotenv
12
- load_dotenv()
13
-
14
- HF_TOKEN = os.getenv("HF_TOKEN")
15
-
16
- logger = logging.getLogger(__name__)
17
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
18
-
19
- max_new_tokens = 64
20
-
21
- class Kanana15V3bInstructProfile:
22
- """Kanana-1.5-v-3b-instruct λͺ¨λΈ ν”„λ‘œν•„"""
23
-
24
- def __init__(self):
25
- # ν™˜κ²½ 감지
26
- self.is_local = self._detect_local_environment()
27
-
28
- # λͺ¨λΈ 경둜 μ„€μ •
29
- if self.is_local:
30
- self.model_name = "gbrabbit/lily-math-model" # λ‘œμ»¬μ—μ„œλ„ HF λͺ¨λΈλͺ… μ‚¬μš©
31
- self.local_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
32
- self.display_name = "Kanana-1.5-v-3b-instruct (둜컬)"
33
- else:
34
- self.model_name = "gbrabbit/lily-math-model" # Hugging Face Hub λͺ¨λΈ 경둜
35
- self.local_path = None # μ„œλ²„μ—μ„œλŠ” 둜컬 경둜 μ‚¬μš© μ•ˆν•¨
36
- self.display_name = "Kanana-1.5-v-3b-instruct (μ„œλ²„)"
37
-
38
- self.description = "카카였 λ©€ν‹°λͺ¨λ‹¬ λͺ¨λΈ (3.6B) - Math RAG νŠΉν™”"
39
- self.language = "ko"
40
- self.model_size = "3.6B"
41
- self.multimodal = True
42
-
43
- def _detect_local_environment(self) -> bool:
44
- """둜컬 ν™˜κ²½μΈμ§€ 감지"""
45
- import os
46
-
47
- # 둜컬 ν™˜κ²½ 감지 쑰건듀
48
- local_indicators = [
49
- os.path.exists('.env'),
50
- os.path.exists('../.env'),
51
- os.path.exists('../../.env'),
52
- os.getenv('IS_LOCAL') == 'true',
53
- os.getenv('ENVIRONMENT') == 'local',
54
- os.getenv('DOCKER_ENV') == 'local',
55
- # Windows 경둜 확인
56
- os.path.exists('C:/Project/lily_generate_project/lily_generate_package/.env'),
57
- ]
58
-
59
- is_local = any(local_indicators)
60
- logger.info(f"πŸ” ν™˜κ²½ 감지: {'둜컬' if is_local else 'μ„œλ²„'}")
61
- return is_local
62
-
63
- def _load_environment_variables(self):
64
- """ν™˜κ²½λ³€μˆ˜λ₯Ό λ‘œλ“œν•©λ‹ˆλ‹€."""
65
- import os
66
-
67
- try:
68
- if self.is_local:
69
- # 둜컬 ν™˜κ²½: .env 파일 λ‘œλ“œ
70
- from dotenv import load_dotenv
71
-
72
- # μ—¬λŸ¬ κ²½λ‘œμ—μ„œ .env 파일 μ°ΎκΈ°
73
- env_paths = [
74
- '.env',
75
- '../.env',
76
- '../../.env',
77
- 'C:/Project/lily_generate_project/lily_generate_package/.env',
78
- ]
79
-
80
- env_loaded = False
81
- for env_path in env_paths:
82
- if os.path.exists(env_path):
83
- load_dotenv(env_path)
84
- logger.info(f"βœ… ν™˜κ²½λ³€μˆ˜ λ‘œλ“œλ¨: {env_path}")
85
- env_loaded = True
86
- break
87
-
88
- if not env_loaded:
89
- logger.warning("⚠️ .env νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€")
90
- else:
91
- # μ„œλ²„ ν™˜κ²½: μ‹œμŠ€ν…œ ν™˜κ²½λ³€μˆ˜ μ‚¬μš©
92
- logger.info("🌐 μ„œλ²„ ν™˜κ²½λ³€μˆ˜ μ‚¬μš©")
93
-
94
- except ImportError:
95
- logger.warning("⚠️ python-dotenvκ°€ μ„€μΉ˜λ˜μ§€ μ•ŠμŒ")
96
- except Exception as e:
97
- logger.error(f"❌ ν™˜κ²½λ³€μˆ˜ λ‘œλ“œ μ‹€νŒ¨: {e}")
98
-
99
- def load_model(self) -> Tuple[Any, Any]:
100
- """ν™˜κ²½μ— 따라 λͺ¨λΈμ„ λ‘œλ“œν•©λ‹ˆλ‹€."""
101
- logger.info(f"πŸ“₯ {self.display_name} λͺ¨λΈ λ‘œλ“œ 쀑...")
102
-
103
- import os
104
- from pathlib import Path
105
-
106
- # ν™˜κ²½λ³€μˆ˜ λ‘œλ”©
107
- self._load_environment_variables()
108
-
109
- try:
110
- # 1. 둜컬 μΊμ‹œ κ²½λ‘œκ°€ μžˆλŠ”μ§€ 확인
111
- use_local = False
112
- if self.local_path is not None:
113
- local_model_path = Path(self.local_path)
114
- use_local = local_model_path.exists() and any(local_model_path.iterdir())
115
-
116
- if use_local:
117
- logger.info(f"πŸ—‚οΈ 둜컬 λͺ¨λΈ μ‚¬μš©: {self.local_path}")
118
- model_path = self.local_path
119
- local_files_only = True
120
-
121
- # 둜컬 λͺ¨λΈμ˜ 경우 sys.path에 μΆ”κ°€
122
- if self.local_path not in sys.path:
123
- sys.path.insert(0, self.local_path)
124
- else:
125
- logger.info(f"🌐 Hugging Face Hubμ—μ„œ λ‹€μš΄λ‘œλ“œ: {self.model_name}")
126
- model_path = self.model_name
127
- local_files_only = False
128
-
129
- # ν™˜κ²½λ³„ μΆ”κ°€ μ„€μ •
130
- if self.is_local:
131
- logger.info("🏠 둜컬 ν™˜κ²½ μ„€μ • 적용")
132
- # 둜컬 ν™˜κ²½μ—μ„œλŠ” μΆ”κ°€ 섀정이 ν•„μš”ν•  수 있음
133
- else:
134
- logger.info("☁️ μ„œλ²„ ν™˜κ²½ μ„€μ • 적용")
135
- # μ„œλ²„ ν™˜κ²½μ—μ„œλŠ” μΊμ‹œ 디렉토리 λ“± μ„€μ •
136
-
137
- # 2. ν† ν¬λ‚˜μ΄μ € λ‘œλ“œ
138
- tokenizer = AutoTokenizer.from_pretrained(
139
- model_path,
140
- token=HF_TOKEN,
141
- trust_remote_code=True,
142
- local_files_only=local_files_only,
143
- cache_dir="/app/cache/transformers" if not use_local else None
144
- )
145
- logger.info(f"βœ… ν† ν¬λ‚˜μ΄μ € λ‘œλ“œ μ™„λ£Œ ({tokenizer.__class__.__name__})")
146
- from modeling import KananaVForConditionalGeneration
147
- # 3. λͺ¨λΈ λ‘œλ“œ
148
- if use_local:
149
- # 둜컬 λͺ¨λΈ: μ»€μŠ€ν…€ λͺ¨λΈλ§ 클래슀 μ‚¬μš©
150
- model = KananaVForConditionalGeneration.from_pretrained(
151
- model_path,
152
- token=HF_TOKEN,
153
- trust_remote_code=True,
154
- torch_dtype=torch.bfloat16,
155
- local_files_only=True,
156
- # low_cpu_mem_usage=True,
157
- ).to(DEVICE)
158
- else:
159
- model = KananaVForConditionalGeneration.from_pretrained(
160
- model_path,
161
- token=HF_TOKEN,
162
- torch_dtype=torch.float16,
163
- trust_remote_code=True,
164
- cache_dir="/app/cache/transformers",
165
- # device_map="auto",
166
- # low_cpu_mem_usage=True,
167
- ).to(DEVICE)
168
-
169
- logger.info(f"βœ… λͺ¨λΈ λ‘œλ“œ μ™„λ£Œ ({model.__class__.__name__})")
170
- return model, tokenizer
171
-
172
- except Exception as e:
173
- logger.error(f"❌ {self.display_name} λͺ¨λΈ λ‘œλ“œ μ‹€νŒ¨: {e}", exc_info=True)
174
- if use_local and self.local_path in sys.path:
175
- sys.path.remove(self.local_path)
176
- raise
177
-
178
- def get_generation_config(self) -> Dict[str, Any]:
179
- # λͺ¨λΈ νŒŒλΌλ―Έν„° μ΅œμ ν™” μ„€μ •, max_new_tokens : μƒμ„±λ˜λŠ” ν…μŠ€νŠΈ 길이 μ΅œλŒ€κ°’ (이미지 μ„€λͺ…을 μœ„ν•΄ 증가)
180
- return {"max_new_tokens": max_new_tokens, "temperature": 0.7, "do_sample": True, "top_k": 40, "top_p": 0.9, "repetition_penalty": 1.1}
181
-
182
- def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
183
- """
184
- λ‹€μ–‘ν•œ 응닡 ν˜•μ‹μ„ μ²˜λ¦¬ν•  수 μžˆλŠ” 더 λ˜‘λ˜‘ν•œ 응닡 μΆ”μΆœ ν•¨μˆ˜
185
- """
186
- logger.info(f"--- 응닡 μΆ”μΆœ μ‹œμž‘ ---")
187
- logger.info(f"전체 생성 ν…μŠ€νŠΈ (Raw): \n---\n{full_text}\n---")
188
-
189
- # ν”„λ‘¬ν”„νŠΈκ°€ 제곡된 경우 이λ₯Ό 제거
190
- if formatted_prompt and formatted_prompt in full_text:
191
- response = full_text.replace(formatted_prompt, "").strip()
192
- logger.info(f"βœ… 성곡: ν”„λ‘¬ν”„νŠΈ 제거둜 응닡 μΆ”μΆœ")
193
- logger.info(f"μΆ”μΆœλœ 응닡: {response}")
194
- if response: # 빈 λ¬Έμžμ—΄μ΄ μ•„λ‹Œ κ²½μš°μ—λ§Œ λ°˜ν™˜
195
- return response
196
-
197
- # 1μˆœμœ„: κ°€μž₯ μ •ν™•ν•œ 특수 νƒœκ·Έλ‘œ μΆ”μΆœ μ‹œλ„
198
- # 예: <|start_header_id|>assistant<|end_header_id|>μ•ˆλ…•ν•˜μ„Έμš”...
199
- # λ˜λŠ” <|im_start|>assistantμ•ˆλ…•ν•˜μ„Έμš”...
200
- assistant_tags = [
201
- "<|start_header_id|>assistant<|end_header_id|>",
202
- "<|im_start|>assistant",
203
- "assistant\n",
204
- "assistant:"
205
- ]
206
- for tag in assistant_tags:
207
- if tag in full_text:
208
- parts = full_text.split(tag)
209
- if len(parts) > 1:
210
- response = parts[-1].strip()
211
- # μΆ”κ°€ 정리: 특수 토큰 제거
212
- response = response.replace("<|im_end|>", "").strip()
213
- logger.info(f"βœ… 성곡: '{tag}' νƒœκ·Έλ‘œ 응닡 μΆ”μΆœ")
214
- logger.info(f"μΆ”μΆœλœ 응닡: {response}")
215
- if response: # 빈 λ¬Έμžμ—΄μ΄ μ•„λ‹Œ κ²½μš°μ—λ§Œ λ°˜ν™˜
216
- return response
217
-
218
- # 2μˆœμœ„: κ°„λ‹¨ν•œ ν‚€μ›Œλ“œλ‘œ μΆ”μΆœ μ‹œλ„
219
- # 예: ... user μ•ˆλ…•ν•˜μ„Έμš” assistant μ•ˆλ…•ν•˜μ„Έμš” ...
220
- if "assistant" in full_text:
221
- parts = full_text.split("assistant")
222
- if len(parts) > 1:
223
- response = parts[-1].strip()
224
- response = response.replace("<|im_end|>", "").strip()
225
- logger.info("βœ… 성곡: 'assistant' ν‚€μ›Œλ“œλ‘œ 응닡 μΆ”μΆœ")
226
- logger.info(f"μΆ”μΆœλœ 응닡: {response}")
227
- if response: # 빈 λ¬Έμžμ—΄μ΄ μ•„λ‹Œ κ²½μš°μ—λ§Œ λ°˜ν™˜
228
- return response
229
-
230
- # 3μˆœμœ„: ν”„λ‘¬ν”„νŠΈκ°€ μ—†λŠ” 경우, 전체 ν…μŠ€νŠΈμ—μ„œ λΆˆν•„μš”ν•œ λΆ€λΆ„ 제거
231
- clean_text = full_text.strip()
232
- # 일반적인 ν”„λ‘¬ν”„νŠΈ νŒ¨ν„΄ 제거 μ‹œλ„
233
- patterns_to_remove = [
234
- "<|im_start|>user\n",
235
- "<|im_end|>",
236
- "<image>",
237
- "user\n",
238
- "assistant\n"
239
- ]
240
-
241
- for pattern in patterns_to_remove:
242
- clean_text = clean_text.replace(pattern, "")
243
-
244
- clean_text = clean_text.strip()
245
-
246
- if clean_text and clean_text != full_text:
247
- logger.info("βœ… 성곡: νŒ¨ν„΄ 제거둜 응닡 정리")
248
- logger.info(f"μ •λ¦¬λœ 응닡: {clean_text}")
249
- return clean_text
250
-
251
- logger.warning("⚠️ κ²½κ³ : μ‘λ‹΅μ—μ„œ assistant 뢀뢄을 μ°Ύμ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€. 전체 ν…μŠ€νŠΈλ₯Ό λ°˜ν™˜ν•©λ‹ˆλ‹€.")
252
- logger.info(f"μ΅œμ’… λ°˜ν™˜ ν…μŠ€νŠΈ: {full_text}")
253
- return full_text
254
-
255
- def get_model_info(self) -> Dict[str, Any]:
256
- return {"model_name": self.model_name, "display_name": self.display_name, "description": self.description, "language": self.language, "model_size": self.model_size, "local_path": self.local_path, "multimodal": self.multimodal}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lily_llm_api/models/kanana_nano_2_1b_instruct.py DELETED
@@ -1,95 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Kanana Nano 2.1B Instruct λͺ¨λΈ ν”„λ‘œν•„
4
- """
5
-
6
- import torch
7
- from transformers import AutoTokenizer, AutoModelForCausalLM
8
- from typing import Dict, Any, Tuple
9
- import logging
10
-
11
- logger = logging.getLogger(__name__)
12
-
13
- class KananaNano21bInstructProfile:
14
- """Kanana Nano 2.1B Instruct λͺ¨λΈ ν”„λ‘œν•„"""
15
-
16
- def __init__(self):
17
- self.model_name = "kakaocorp/kanana-nano-2.1b-instruct"
18
- self.local_path = "./lily_llm_core/models/kanana-nano-2.1b-instruct"
19
- self.display_name = "Kanana Nano 2.1B Instruct"
20
- self.description = "Kakao의 Kanana Nano 2.1B Instruct λͺ¨λΈ (κ°€μž₯ μž‘μ€ λͺ¨λΈ)"
21
- self.language = ["ko", "en"]
22
- self.model_size = "2.1B"
23
-
24
- def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
25
- """λͺ¨λΈ λ‘œλ“œ"""
26
- logger.info(f"πŸ“₯ {self.display_name} λͺ¨λΈ λ‘œλ“œ 쀑...")
27
-
28
- try:
29
- # ν† ν¬λ‚˜μ΄μ € λ‘œλ“œ
30
- tokenizer = AutoTokenizer.from_pretrained(
31
- self.local_path,
32
- trust_remote_code=True,
33
- local_files_only=True
34
- )
35
-
36
- # λͺ¨λΈ λ‘œλ“œ
37
- model = AutoModelForCausalLM.from_pretrained(
38
- self.local_path,
39
- torch_dtype=torch.float32,
40
- device_map="cpu",
41
- low_cpu_mem_usage=True,
42
- local_files_only=True
43
- )
44
-
45
- # ν† ν¬λ‚˜μ΄μ € μ„€μ •
46
- if tokenizer.pad_token is None:
47
- tokenizer.pad_token = tokenizer.eos_token
48
- if tokenizer.eos_token is None:
49
- tokenizer.eos_token = "</s>"
50
-
51
- logger.info(f"βœ… {self.display_name} λͺ¨λΈ λ‘œλ“œ 성곡!")
52
- return model, tokenizer
53
-
54
- except Exception as e:
55
- logger.error(f"❌ {self.display_name} λͺ¨λΈ λ‘œλ“œ μ‹€νŒ¨: {e}")
56
- raise
57
-
58
- def format_prompt(self, user_input: str) -> str:
59
- """ν”„λ‘¬ν”„νŠΈ ν¬λ§·νŒ…"""
60
- return f"<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n"
61
-
62
- def extract_response(self, full_text: str, formatted_prompt: str) -> str:
63
- """응닡 μΆ”μΆœ"""
64
- if "<|im_start|>assistant\n" in full_text:
65
- response = full_text.split("<|im_start|>assistant\n")[-1]
66
- if "<|im_end|>" in response:
67
- response = response.split("<|im_end|>")[0]
68
- return response.strip()
69
- return full_text.strip()
70
-
71
- def get_generation_config(self) -> Dict[str, Any]:
72
- """생성 μ„€μ •"""
73
- return {
74
- "max_new_tokens": 128, # 512μ—μ„œ 128둜 μ€„μž„
75
- "temperature": 0.7,
76
- "top_p": 0.9,
77
- "do_sample": True,
78
- "repetition_penalty": 1.1,
79
- "no_repeat_ngram_size": 3,
80
- "pad_token_id": None, # ν† ν¬λ‚˜μ΄μ €μ—μ„œ 섀정됨
81
- "eos_token_id": None, # ν† ν¬λ‚˜μ΄μ €μ—μ„œ 섀정됨
82
- "use_cache": True, # μΊμ‹œ μ‚¬μš©
83
- "return_dict_in_generate": False, # λ©”λͺ¨λ¦¬ μ ˆμ•½
84
- }
85
-
86
- def get_model_info(self) -> Dict[str, Any]:
87
- """λͺ¨λΈ 정보"""
88
- return {
89
- "model_name": self.model_name,
90
- "display_name": self.display_name,
91
- "description": self.description,
92
- "language": self.language,
93
- "model_size": self.model_size,
94
- "local_path": self.local_path
95
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lily_llm_api/models/mistral_7b_instruct.py DELETED
@@ -1,103 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Mistral-7B-Instruct-v0.2 λͺ¨λΈ ν”„λ‘œν•„
4
- mistralai/Mistral-7B-Instruct-v0.2 λͺ¨λΈμš©
5
- """
6
-
7
- from typing import Dict, Any, Tuple
8
- import torch
9
- from transformers import AutoTokenizer, AutoModelForCausalLM
10
- import logging
11
-
12
- logger = logging.getLogger(__name__)
13
-
14
- class Mistral7bInstructProfile:
15
- """Mistral-7B-Instruct-v0.2 λͺ¨λΈ ν”„λ‘œν•„"""
16
-
17
- def __init__(self):
18
- self.model_name = "mistralai/Mistral-7B-Instruct-v0.2"
19
- self.local_path = "./lily_llm_core/models/mistral-7B-Instruct-v0.2"
20
- self.display_name = "Mistral-7B-Instruct-v0.2"
21
- self.description = "Mistral AI의 7B νŒŒλΌλ―Έν„° 인슀트럭트 λͺ¨λΈ"
22
- self.language = "en"
23
- self.model_size = "7B"
24
-
25
- def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
26
- """λͺ¨λΈ λ‘œλ“œ"""
27
- logger.info(f"πŸ“₯ {self.display_name} λͺ¨λΈ λ‘œλ“œ 쀑...")
28
-
29
- try:
30
- # 둜컬 λͺ¨λΈ λ‘œλ“œ
31
- tokenizer = AutoTokenizer.from_pretrained(self.local_path, use_fast=True)
32
-
33
- if tokenizer.pad_token is None:
34
- tokenizer.pad_token = tokenizer.eos_token
35
-
36
- model = AutoModelForCausalLM.from_pretrained(
37
- self.local_path,
38
- trust_remote_code=True,
39
- local_files_only=True,
40
- torch_dtype=torch.bfloat16,
41
- # device_map="cpu",
42
- # low_cpu_mem_usage=True
43
- # max_memory={"cpu": "8GB"}
44
- )
45
-
46
- # λͺ¨λΈμ„ CPU둜 λͺ…μ‹œμ  이동
47
- model.to('cpu')
48
-
49
- logger.info(f"βœ… {self.display_name} λͺ¨λΈ λ‘œλ“œ 성곡!")
50
- return model, tokenizer
51
-
52
- except Exception as e:
53
- logger.error(f"❌ {self.display_name} λͺ¨λΈ λ‘œλ“œ μ‹€νŒ¨: {e}")
54
- raise
55
-
56
- def format_prompt(self, user_input: str) -> str:
57
- """ν”„λ‘¬ν”„νŠΈ ν¬λ§·νŒ… - Mistral 인슀트럭트 ν˜•μ‹"""
58
- # Mistral-7B-Instruct-v0.2 λͺ¨λΈμ˜ ꢌμž₯ ν”„λ‘¬ν”„νŠΈ ν˜•μ‹
59
- prompt = f"""<s>[INST] {user_input} [/INST]"""
60
- return prompt
61
-
62
- def extract_response(self, full_text: str, formatted_prompt: str) -> str:
63
- """응닡 μΆ”μΆœ"""
64
- # Mistral λͺ¨λΈμ˜ 응닡 μΆ”μΆœ
65
- if "[/INST]" in full_text:
66
- response = full_text.split("[/INST]")[-1].strip()
67
- else:
68
- # ν”„λ‘¬ν”„νŠΈ 제거
69
- if formatted_prompt in full_text:
70
- response = full_text.replace(formatted_prompt, "").strip()
71
- else:
72
- response = full_text.strip()
73
-
74
- # 빈 μ‘λ‹΅μ΄λ‚˜ μ΄μƒν•œ 문자만 μžˆλŠ” 경우 처리
75
- if not response or len(response.strip()) < 2:
76
- return "Hello! How can I help you today?"
77
-
78
- return response
79
-
80
- def get_generation_config(self) -> Dict[str, Any]:
81
- """생성 μ„€μ •"""
82
- return {
83
- "max_new_tokens": 128,
84
- "temperature": 0.7,
85
- "do_sample": True,
86
- "top_k": 50,
87
- "top_p": 0.9,
88
- "repetition_penalty": 1.1,
89
- "no_repeat_ngram_size": 3,
90
- "pad_token_id": None, # λͺ¨λΈμ—μ„œ μžλ™ μ„€μ •
91
- "eos_token_id": None # λͺ¨λΈμ—μ„œ μžλ™ μ„€μ •
92
- }
93
-
94
- def get_model_info(self) -> Dict[str, Any]:
95
- """λͺ¨λΈ 정보"""
96
- return {
97
- "model_name": self.model_name,
98
- "display_name": self.display_name,
99
- "description": self.description,
100
- "language": self.language,
101
- "model_size": self.model_size,
102
- "local_path": self.local_path
103
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lily_llm_api/models/polyglot_ko_1_3b.py DELETED
@@ -1,102 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Polyglot-ko-1.3b λͺ¨λΈ ν”„λ‘œν•„
4
- """
5
-
6
- from typing import Dict, Any, Tuple
7
- import torch
8
- from transformers import AutoTokenizer, AutoModelForCausalLM
9
- import logging
10
-
11
- logger = logging.getLogger(__name__)
12
-
13
- class PolyglotKo13bProfile:
14
- """Polyglot-ko-1.3b λͺ¨λΈ ν”„λ‘œν•„"""
15
-
16
- def __init__(self):
17
- self.model_name = "EleutherAI/polyglot-ko-1.3b"
18
- self.local_path = "./lily_llm_core/models/polyglot-ko-1.3b"
19
- self.display_name = "Polyglot-ko-1.3b"
20
- self.description = "ν•œκ΅­μ–΄ μ „μš© κ²½λŸ‰ λͺ¨λΈ (1.3B)"
21
- self.language = "ko"
22
- self.model_size = "1.3B"
23
-
24
- def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
25
- """λͺ¨λΈ λ‘œλ“œ"""
26
- logger.info(f"πŸ“₯ {self.display_name} λͺ¨λΈ λ‘œλ“œ 쀑...")
27
-
28
- try:
29
- # 둜컬 λͺ¨λΈ λ‘œλ“œ
30
- tokenizer = AutoTokenizer.from_pretrained(self.local_path, use_fast=True)
31
-
32
- if tokenizer.pad_token is None:
33
- tokenizer.pad_token = tokenizer.eos_token
34
-
35
- model = AutoModelForCausalLM.from_pretrained(
36
- self.local_path,
37
- torch_dtype=torch.bfloat16,
38
- device_map="cpu",
39
- # low_cpu_mem_usage=True,
40
- trust_remote_code=True,
41
- local_files_only=True,
42
- )
43
-
44
- logger.info(f"βœ… {self.display_name} λͺ¨λΈ λ‘œλ“œ 성곡!")
45
- return model, tokenizer
46
-
47
- except Exception as e:
48
- logger.error(f"❌ {self.display_name} λͺ¨λΈ λ‘œλ“œ μ‹€νŒ¨: {e}")
49
- raise
50
-
51
- def format_prompt(self, user_input: str) -> str:
52
- """ν”„λ‘¬ν”„νŠΈ ν¬λ§·νŒ…"""
53
- # 더 μžμ—°μŠ€λŸ¬μš΄ ν•œκ΅­μ–΄ ν”„λ‘¬ν”„νŠΈ ν˜•μ‹
54
- prompt = f"""λ‹€μŒ μ§ˆλ¬Έμ— λŒ€ν•΄ μΉœμ ˆν•˜κ³  μžμ„Ένžˆ λ‹΅λ³€ν•΄μ£Όμ„Έμš”.
55
-
56
- 질문: {user_input}
57
-
58
- λ‹΅λ³€:"""
59
- return prompt
60
-
61
- def extract_response(self, full_text: str, formatted_prompt: str) -> str:
62
- """응닡 μΆ”μΆœ"""
63
- # "λ‹΅λ³€:" μ΄ν›„μ˜ ν…μŠ€νŠΈλ₯Ό μΆ”μΆœ
64
- if "λ‹΅λ³€:" in full_text:
65
- response = full_text.split("λ‹΅λ³€:")[-1].strip()
66
- else:
67
- # ν”„λ‘¬ν”„νŠΈ 제거
68
- if formatted_prompt in full_text:
69
- response = full_text.replace(formatted_prompt, "").strip()
70
- else:
71
- response = full_text.strip()
72
-
73
- # 빈 μ‘λ‹΅μ΄λ‚˜ μ΄μƒν•œ 문자만 μžˆλŠ” 경우 처리
74
- if not response or len(response.strip()) < 2:
75
- return "μ•ˆλ…•ν•˜μ„Έμš”! 무엇을 λ„μ™€λ“œλ¦΄κΉŒμš”?"
76
-
77
- return response
78
-
79
- def get_generation_config(self) -> Dict[str, Any]:
80
- """생성 μ„€μ •"""
81
- return {
82
- "max_new_tokens": 128,
83
- "temperature": 0.7,
84
- "do_sample": True,
85
- "top_k": 50,
86
- "top_p": 0.9,
87
- "repetition_penalty": 1.1,
88
- "no_repeat_ngram_size": 3,
89
- "pad_token_id": None, # λͺ¨λΈμ—μ„œ μžλ™ μ„€μ •
90
- "eos_token_id": None # λͺ¨λΈμ—μ„œ μžλ™ μ„€μ •
91
- }
92
-
93
- def get_model_info(self) -> Dict[str, Any]:
94
- """λͺ¨λΈ 정보"""
95
- return {
96
- "model_name": self.model_name,
97
- "display_name": self.display_name,
98
- "description": self.description,
99
- "language": self.language,
100
- "model_size": self.model_size,
101
- "local_path": self.local_path
102
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lily_llm_api/models/polyglot_ko_1_3b_chat.py CHANGED
@@ -8,6 +8,8 @@ from typing import Dict, Any, Tuple
8
  import torch
9
  from transformers import AutoTokenizer, AutoModelForCausalLM
10
  import logging
 
 
11
 
12
  logger = logging.getLogger(__name__)
13
 
@@ -16,38 +18,43 @@ class PolyglotKo13bChatProfile:
16
 
17
  def __init__(self):
18
  self.model_name = "heegyu/polyglot-ko-1.3b-chat"
19
- self.local_path = "./lily_llm_core/models/polyglot-ko-1.3b-chat"
20
  self.display_name = "Polyglot-ko-1.3b-chat"
21
  self.description = "ν•œκ΅­μ–΄ μ±„νŒ… μ „μš© κ²½λŸ‰ λͺ¨λΈ (1.3B)"
22
  self.language = "ko"
23
  self.model_size = "1.3B"
24
 
25
  def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
26
- """λͺ¨λΈ λ‘œλ“œ"""
27
  logger.info(f"πŸ“₯ {self.display_name} λͺ¨λΈ λ‘œλ“œ 쀑...")
28
-
29
  try:
30
- # 둜컬 λͺ¨λΈ λ‘œλ“œ
31
- tokenizer = AutoTokenizer.from_pretrained(self.local_path, use_fast=True)
32
-
 
 
 
 
 
 
 
 
33
  if tokenizer.pad_token is None:
34
  tokenizer.pad_token = tokenizer.eos_token
35
-
 
 
 
 
36
  model = AutoModelForCausalLM.from_pretrained(
37
- self.local_path,
38
- # torch_dtype=torch.float32,
39
- device_map="cpu",
40
- # low_cpu_mem_usage=True
41
  trust_remote_code=True,
42
- torch_dtype=torch.bfloat16,
43
- local_files_only=True,
44
- )
45
-
46
- # model.to('cpu')
47
-
48
- logger.info(f"βœ… {self.display_name} λͺ¨λΈ λ‘œλ“œ 성곡!")
49
  return model, tokenizer
50
-
51
  except Exception as e:
52
  logger.error(f"❌ {self.display_name} λͺ¨λΈ λ‘œλ“œ μ‹€νŒ¨: {e}")
53
  raise
@@ -100,5 +107,6 @@ class PolyglotKo13bChatProfile:
100
  "description": self.description,
101
  "language": self.language,
102
  "model_size": self.model_size,
103
- "local_path": self.local_path
 
104
  }
 
8
  import torch
9
  from transformers import AutoTokenizer, AutoModelForCausalLM
10
  import logging
11
+ import os
12
+ from pathlib import Path
13
 
14
  logger = logging.getLogger(__name__)
15
 
 
18
 
19
  def __init__(self):
20
  self.model_name = "heegyu/polyglot-ko-1.3b-chat"
21
+ self.local_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
22
  self.display_name = "Polyglot-ko-1.3b-chat"
23
  self.description = "ν•œκ΅­μ–΄ μ±„νŒ… μ „μš© κ²½λŸ‰ λͺ¨λΈ (1.3B)"
24
  self.language = "ko"
25
  self.model_size = "1.3B"
26
 
27
  def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
28
+ """λͺ¨λΈ λ‘œλ“œ (둜컬 μš°μ„ , μ—†μœΌλ©΄ Hub)"""
29
  logger.info(f"πŸ“₯ {self.display_name} λͺ¨λΈ λ‘œλ“œ 쀑...")
 
30
  try:
31
+ use_local = Path(self.local_path).exists() and any(Path(self.local_path).iterdir())
32
+ model_path = self.local_path if use_local else self.model_name
33
+
34
+ logger.info(f"πŸ” λͺ¨λΈ 경둜: {model_path} (local={'yes' if use_local else 'no'})")
35
+
36
+ tokenizer = AutoTokenizer.from_pretrained(
37
+ model_path,
38
+ use_fast=True,
39
+ trust_remote_code=True,
40
+ local_files_only=use_local,
41
+ )
42
  if tokenizer.pad_token is None:
43
  tokenizer.pad_token = tokenizer.eos_token
44
+
45
+ # CPUμ—μ„œλŠ” float32κ°€ 더 μ•ˆμ •μ , CUDAμ—μ„œλŠ” float16 μ‚¬μš©
46
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
47
+ selected_dtype = torch.float16 if device == 'cuda' else torch.float32
48
+
49
  model = AutoModelForCausalLM.from_pretrained(
50
+ model_path,
 
 
 
51
  trust_remote_code=True,
52
+ torch_dtype=selected_dtype,
53
+ local_files_only=use_local,
54
+ ).to(device)
55
+
56
+ logger.info(f"βœ… {self.display_name} λͺ¨λΈ λ‘œλ“œ 성곡! (device={device}, dtype={selected_dtype})")
 
 
57
  return model, tokenizer
 
58
  except Exception as e:
59
  logger.error(f"❌ {self.display_name} λͺ¨λΈ λ‘œλ“œ μ‹€νŒ¨: {e}")
60
  raise
 
107
  "description": self.description,
108
  "language": self.language,
109
  "model_size": self.model_size,
110
+ "local_path": self.local_path,
111
+ "multimodal": False,
112
  }
lily_llm_api/models/polyglot_ko_5_8b.py DELETED
@@ -1,104 +0,0 @@
1
- #!/usr/bin/env python3
2
-
3
- """
4
- KoAlpaca-Polyglot-5.8B λͺ¨λΈ λ‹€μš΄λ‘œλ“œ
5
- """
6
-
7
- from typing import Dict, Any, Tuple
8
- import torch
9
- from transformers import AutoTokenizer, AutoModelForCausalLM
10
- import logging
11
-
12
- logger = logging.getLogger(__name__)
13
-
14
- class PolyglotKo58bProfile:
15
- """KoAlpaca-Polyglot-5.8B λͺ¨λΈ ν”„λ‘œν•„"""
16
-
17
- def __init__(self):
18
- self.model_name = "beomi/KoAlpaca-Polyglot-5.8B"
19
- self.local_path = "./lily_llm_core/models/koalpaca-polyglot-5.8b"
20
- self.display_name = "KoAlpaca-Polyglot-5.8B"
21
- self.description = "EleutherAI/polyglot-ko-5.8b의 λ―Έμ„Έ μ‘°μ •λœ 버전"
22
- self.language = "ko"
23
- self.model_size = "5.8B"
24
-
25
- def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
26
- """λͺ¨λΈ λ‘œλ“œ"""
27
- logger.info(f"πŸ“₯ {self.display_name} λͺ¨λΈ λ‘œλ“œ 쀑...")
28
-
29
- try:
30
- # 둜컬 λͺ¨λΈ λ‘œλ“œ
31
- tokenizer = AutoTokenizer.from_pretrained(self.local_path, use_fast=True)
32
-
33
- if tokenizer.pad_token is None:
34
- tokenizer.pad_token = tokenizer.eos_token
35
-
36
- model = AutoModelForCausalLM.from_pretrained(
37
- self.local_path,
38
- torch_dtype=torch.bfloat16,
39
- # torch_dtype=torch.float32,
40
- device_map="cpu",
41
- # low_cpu_mem_usage=True,
42
- trust_remote_code=True,
43
- local_files_only=True,
44
- )
45
-
46
- # model.to('cpu')
47
-
48
- logger.info(f"βœ… {self.display_name} λͺ¨λΈ λ‘œλ“œ 성곡!")
49
- return model, tokenizer
50
-
51
- except Exception as e:
52
- logger.error(f"❌ {self.display_name} λͺ¨λΈ λ‘œλ“œ μ‹€νŒ¨: {e}")
53
- raise
54
-
55
- def format_prompt(self, user_input: str) -> str:
56
- """ν”„λ‘¬ν”„νŠΈ ν¬λ§·νŒ… - μ±„νŒ… ν˜•μ‹"""
57
- # heegyu/polyglot-ko-1.3b-chat λͺ¨λΈμ˜ ꢌμž₯ ν”„λ‘¬ν”„νŠΈ ν˜•μ‹
58
- prompt = f"""당신은 AI μ±—λ΄‡μž…λ‹ˆλ‹€. μ‚¬μš©μžμ—κ²Œ 도움이 되고 μœ μ΅ν•œ λ‚΄μš©μ„ μ œκ³΅ν•΄μ•Όν•©λ‹ˆλ‹€. 닡변은 κΈΈκ³  μžμ„Έν•˜λ©° μΉœμ ˆν•œ μ„€λͺ…을 λ§λΆ™μ—¬μ„œ μž‘μ„±ν•˜μ„Έμš”.
59
-
60
- ### μ‚¬μš©μž:
61
- {user_input}
62
-
63
- ### 챗봇:
64
- """
65
- return prompt
66
-
67
- def extract_response(self, full_text: str, formatted_prompt: str) -> str:
68
- """응닡 μΆ”μΆœ"""
69
- # "### 챗봇:" μ΄ν›„μ˜ ν…μŠ€νŠΈλ₯Ό μΆ”μΆœ
70
- if "### 챗봇:" in full_text:
71
- response = full_text.split("### 챗봇:")[-1].strip()
72
- else:
73
- # ν”„λ‘¬ν”„νŠΈ 제거
74
- if formatted_prompt in full_text:
75
- response = full_text.replace(formatted_prompt, "").strip()
76
- else:
77
- response = full_text.strip()
78
-
79
- return response
80
-
81
- def get_generation_config(self) -> Dict[str, Any]:
82
- """생성 μ„€μ •"""
83
- return {
84
- "max_new_tokens": 128,
85
- "temperature": 0.7,
86
- "do_sample": True,
87
- "top_k": 50,
88
- "top_p": 0.9,
89
- "repetition_penalty": 1.1,
90
- "no_repeat_ngram_size": 3,
91
- "pad_token_id": None, # λͺ¨λΈμ—μ„œ μžλ™ μ„€μ •
92
- "eos_token_id": None # λͺ¨λΈμ—μ„œ μžλ™ μ„€μ •
93
- }
94
-
95
- def get_model_info(self) -> Dict[str, Any]:
96
- """λͺ¨λΈ 정보"""
97
- return {
98
- "model_name": self.model_name,
99
- "display_name": self.display_name,
100
- "description": self.description,
101
- "language": self.language,
102
- "model_size": self.model_size,
103
- "local_path": self.local_path
104
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lily_llm_api/models/polyglot_ko_5_8b_chat.py CHANGED
@@ -15,35 +15,43 @@ class PolyglotKo58bChatProfile:
15
 
16
  def __init__(self):
17
  self.model_name = "heegyu/polyglot-ko-5.8b-chat"
18
- self.local_path = "./lily_llm_core/models/polyglot-ko-5.8b-chat"
19
  self.display_name = "heegyu/polyglot-ko-5.8b-chat"
20
  self.description = "EleutherAI/polyglot-ko-5.8bλ₯Ό μ—¬λŸ¬ ν•œκ΅­μ–΄ instruction λ°μ΄ν„°μ…‹μœΌλ‘œ ν•™μŠ΅ν•œ λͺ¨λΈ"
21
  self.language = "ko"
22
  self.model_size = "5.8B"
23
 
24
  def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
25
- """λͺ¨λΈ λ‘œλ“œ"""
26
  logger.info(f"πŸ“₯ {self.display_name} λͺ¨λΈ λ‘œλ“œ 쀑...")
27
-
28
  try:
29
- # 둜컬 λͺ¨λΈ λ‘œλ“œ
30
- tokenizer = AutoTokenizer.from_pretrained(self.local_path, use_fast=True)
31
-
 
 
 
 
 
 
 
 
 
32
  if tokenizer.pad_token is None:
33
  tokenizer.pad_token = tokenizer.eos_token
34
-
 
 
 
35
  model = AutoModelForCausalLM.from_pretrained(
36
- self.local_path,
37
- torch_dtype=torch.bfloat16,
38
- device_map="cpu",
39
- # low_cpu_mem_usage=True,
40
  trust_remote_code=True,
41
- local_files_only=True,
42
- )
43
-
44
- logger.info(f"βœ… {self.display_name} λͺ¨λΈ λ‘œλ“œ 성곡!")
 
45
  return model, tokenizer
46
-
47
  except Exception as e:
48
  logger.error(f"❌ {self.display_name} λͺ¨λΈ λ‘œλ“œ μ‹€νŒ¨: {e}")
49
  raise
@@ -84,5 +92,6 @@ class PolyglotKo58bChatProfile:
84
  "description": self.description,
85
  "language": self.language,
86
  "model_size": self.model_size,
87
- "local_path": self.local_path
 
88
  }
 
15
 
16
  def __init__(self):
17
  self.model_name = "heegyu/polyglot-ko-5.8b-chat"
18
+ self.local_path = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
19
  self.display_name = "heegyu/polyglot-ko-5.8b-chat"
20
  self.description = "EleutherAI/polyglot-ko-5.8bλ₯Ό μ—¬λŸ¬ ν•œκ΅­μ–΄ instruction λ°μ΄ν„°μ…‹μœΌλ‘œ ν•™μŠ΅ν•œ λͺ¨λΈ"
21
  self.language = "ko"
22
  self.model_size = "5.8B"
23
 
24
  def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
25
+ """λͺ¨λΈ λ‘œλ“œ (둜컬 μš°μ„ , μ—†μœΌλ©΄ Hub)"""
26
  logger.info(f"πŸ“₯ {self.display_name} λͺ¨λΈ λ‘œλ“œ 쀑...")
 
27
  try:
28
+ from pathlib import Path
29
+ use_local = Path(self.local_path).exists() and any(Path(self.local_path).iterdir())
30
+ model_path = self.local_path if use_local else self.model_name
31
+
32
+ logger.info(f"πŸ” λͺ¨λΈ 경둜: {model_path} (local={'yes' if use_local else 'no'})")
33
+
34
+ tokenizer = AutoTokenizer.from_pretrained(
35
+ model_path,
36
+ use_fast=True,
37
+ trust_remote_code=True,
38
+ local_files_only=use_local,
39
+ )
40
  if tokenizer.pad_token is None:
41
  tokenizer.pad_token = tokenizer.eos_token
42
+
43
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
44
+ selected_dtype = torch.float16 if device == 'cuda' else torch.float32
45
+
46
  model = AutoModelForCausalLM.from_pretrained(
47
+ model_path,
 
 
 
48
  trust_remote_code=True,
49
+ torch_dtype=selected_dtype,
50
+ local_files_only=use_local,
51
+ ).to(device)
52
+
53
+ logger.info(f"βœ… {self.display_name} λͺ¨λΈ λ‘œλ“œ 성곡! (device={device}, dtype={selected_dtype})")
54
  return model, tokenizer
 
55
  except Exception as e:
56
  logger.error(f"❌ {self.display_name} λͺ¨λΈ λ‘œλ“œ μ‹€νŒ¨: {e}")
57
  raise
 
92
  "description": self.description,
93
  "language": self.language,
94
  "model_size": self.model_size,
95
+ "local_path": self.local_path,
96
+ "multimodal": False,
97
  }
lily_llm_core/config.py CHANGED
@@ -36,8 +36,8 @@ class ModelSettings(BaseSettings):
36
 
37
  # λͺ¨λΈλ³„ μ„€μ •
38
  kanana_1_5_v_3b_instruct_model_path: str = Field(default="./models/kanana_1_5_v_3b_instruct", description="Kanana 1.5 v 3b λͺ¨λΈ 경둜")
39
- polyglot_ko_1_3b_chat_model_path: str = Field(default="./models/polyglot-ko-1.3b-chat", description="Polyglot 1.3b λͺ¨λΈ 경둜")
40
- polyglot_ko_5_8b_chat_model_path: str = Field(default="./models/polyglot-ko-5.8b-chat", description="Polyglot 5.8b λͺ¨λΈ 경둜")
41
 
42
  class Config:
43
  env_prefix = "MODEL_"
 
36
 
37
  # λͺ¨λΈλ³„ μ„€μ •
38
  kanana_1_5_v_3b_instruct_model_path: str = Field(default="./models/kanana_1_5_v_3b_instruct", description="Kanana 1.5 v 3b λͺ¨λΈ 경둜")
39
+ polyglot_ko_1_3b_chat_model_path: str = Field(default="./models/polyglot_ko_1_3b_chat", description="Polyglot 1.3b λͺ¨λΈ 경둜")
40
+ polyglot_ko_5_8b_chat_model_path: str = Field(default="./models/polyglot_ko_5_8b_chat", description="Polyglot 5.8b λͺ¨λΈ 경둜")
41
 
42
  class Config:
43
  env_prefix = "MODEL_"
test.py CHANGED
@@ -1,60 +1,82 @@
 
 
 
 
 
1
  import requests
2
  import json
3
- import os # os λͺ¨λ“ˆ μΆ”κ°€
4
- from dotenv import load_dotenv
5
- load_dotenv()
6
-
7
- # 1. ν™˜κ²½ λ³€μˆ˜μ—μ„œ ν—ˆκΉ…νŽ˜μ΄μŠ€ 토큰을 κ°€μ Έμ˜΅λ‹ˆλ‹€.
8
- # ν„°λ―Έλ„μ—μ„œ `set HUGGING_FACE_TOKEN=hf_...` (Windows) λ˜λŠ”
9
- # `export HUGGING_FACE_TOKEN=hf_...` (Mac/Linux) λͺ…λ ΉμœΌλ‘œ 미리 μ„€μ •ν•©λ‹ˆλ‹€.
10
- HF_TOKEN = os.getenv("HF_TOKEN")
11
 
12
- # ν—ˆκΉ…νŽ˜μ΄μŠ€ FastAPI μ„œλ²„ URL
13
- HF_API_BASE = "https://gbrabbit-lily-fast-api.hf.space"
14
-
15
- def test_generate_text():
16
- """ν…μŠ€νŠΈ 생성 ν…ŒμŠ€νŠΈ (인증 μΆ”κ°€)"""
17
- print("\nπŸ” ν…μŠ€νŠΈ 생성 ν…ŒμŠ€νŠΈ...")
 
 
 
 
 
18
 
19
- if not HF_TOKEN:
20
- print("❌ HUGGING_FACE_TOKEN ν™˜κ²½ λ³€μˆ˜κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
21
- return False
 
22
 
23
- try:
24
- # 2. 인증 토큰을 담을 헀더(headers)λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
25
- headers = {
26
- "Authorization": f"Bearer {HF_TOKEN}"
27
- }
28
-
29
- data = {
30
- 'prompt': 'μ•ˆλ…•ν•˜μ„Έμš”! Private μŠ€νŽ˜μ΄μŠ€μ—μ„œ 잘 μ§€λ‚΄μ‹œλ‚˜μš”?',
31
- 'max_length': 20
32
  }
33
 
34
- print(f"πŸ“€ μš”μ²­ 데이터 (Form): {json.dumps(data, ensure_ascii=False)}")
35
-
36
- # 3. requests.post 호좜 μ‹œ headers νŒŒλΌλ―Έν„°λ₯Ό μΆ”κ°€ν•©λ‹ˆλ‹€.
37
- response = requests.post(
38
- f"{HF_API_BASE}/generate",
39
- headers=headers, # <<-- 인증 헀더 μΆ”κ°€!
40
- data=data,
41
- timeout=2000
42
- )
43
-
44
- print(f"βœ… μƒνƒœ μ½”λ“œ: {response.status_code}") # 이제 200이 ν‘œμ‹œλ  κ²ƒμž…λ‹ˆλ‹€.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  if response.status_code == 200:
47
  result = response.json()
48
- print(f"βœ… 응닡: {json.dumps(result, indent=2, ensure_ascii=False)}")
49
- else:
50
- print(f"❌ 응닡: {response.text}")
51
-
52
- return response.status_code == 200
53
-
54
  except Exception as e:
55
- print(f"❌ ν…μŠ€νŠΈ 생성 ν…ŒμŠ€νŠΈ μ‹€νŒ¨: {e}")
56
- return False
57
 
58
- # 슀크립트 μ‹€ν–‰
59
  if __name__ == "__main__":
60
- test_generate_text()
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ κ°„λ‹¨ν•œ API ν…ŒμŠ€νŠΈ 슀크립트 (μ΅œμ’… μˆ˜μ •λ³Έ)
4
+ """
5
+
6
  import requests
7
  import json
 
 
 
 
 
 
 
 
8
 
9
+ def test_api():
10
+ """API ν…ŒμŠ€νŠΈ"""
11
+ url = "http://localhost:8001/generate"
12
+
13
+ test_prompts = [
14
+ "μ•ˆλ…•ν•˜μ„Έμš”!",
15
+ # "였늘 기뢄이 μ–΄λ•Œμš”?",
16
+ # "κ°„λ‹¨ν•œ μžκΈ°μ†Œκ°œλ₯Ό ν•΄μ£Όμ„Έμš”",
17
+ # "ν”„λ‘œκ·Έλž˜λ°μ΄λž€ λ¬΄μ—‡μΈκ°€μš”?",
18
+ # "날씨가 μ’‹λ„€μš”"
19
+ ]
20
 
21
+ for i, prompt in enumerate(test_prompts, 1):
22
+ print(f"\n{'='*50}")
23
+ print(f"ν…ŒμŠ€νŠΈ {i}: {prompt}")
24
+ print(f"{'='*50}")
25
 
26
+ # API μš”μ²­ - Form 데이터 ν˜•μ‹μœΌλ‘œ 전솑
27
+ payload = {
28
+ "prompt": prompt,
29
+ "max_length": 20, # 더 짧게
30
+ "temperature": 0.8, # 더 λ†’κ²Œ
31
+ "top_p": 0.95, # 더 λ†’κ²Œ
32
+ "do_sample": True
 
 
33
  }
34
 
35
+ try:
36
+ # json= 인자λ₯Ό data= 둜 λ³€κ²½ν•˜μ—¬ Form λ°μ΄ν„°λ‘œ 전솑
37
+ response = requests.post(url, data=payload, timeout=600) # ν…μŠ€νŠΈ 생성 μ‹œκ°„μ„ κ³ λ €ν•΄ νƒ€μž„μ•„μ›ƒ 증가
38
+
39
+ if response.status_code == 200:
40
+ result = response.json()
41
+ print(f"βœ… 성곡!")
42
+ print(f"πŸ“ μƒμ„±λœ ν…μŠ€νŠΈ: '{result['generated_text']}'")
43
+ print(f"⏱️ 처리 μ‹œκ°„: {result['processing_time']:.2f}초")
44
+ print(f"πŸ€– λͺ¨λΈ: {result['model_name']}")
45
+ else:
46
+ print(f"❌ 였λ₯˜: {response.status_code}")
47
+ print(f"πŸ“„ 응닡: {response.text}")
48
+
49
+ except Exception as e:
50
+ print(f"❌ μš”μ²­ μ‹€νŒ¨: {e}")
51
+
52
+ def test_raw_response():
53
+ """μ›μ‹œ 응닡 확인"""
54
+ url = "http://localhost:8001/generate"
55
+
56
+ payload = {
57
+ "prompt": "Hello",
58
+ "max_length": 20,
59
+ "temperature": 1.0,
60
+ "top_p": 1.0,
61
+ "do_sample": True
62
+ }
63
+
64
+ try:
65
+ response = requests.post(url, json=payload, timeout=30)
66
+ print(f"\nπŸ” μ›μ‹œ 응닡 확인:")
67
+ print(f"μƒνƒœ μ½”λ“œ: {response.status_code}")
68
+ print(f"응닡 헀더: {dict(response.headers)}")
69
+ print(f"응닡 λ‚΄μš©: {response.text}")
70
 
71
  if response.status_code == 200:
72
  result = response.json()
73
+ print(f"νŒŒμ‹±λœ JSON: {json.dumps(result, indent=2, ensure_ascii=False)}")
74
+
 
 
 
 
75
  except Exception as e:
76
+ print(f"❌ μ›μ‹œ 응닡 확인 μ‹€νŒ¨: {e}")
 
77
 
 
78
  if __name__ == "__main__":
79
+ print("πŸ§ͺ Lily LLM API ν…ŒμŠ€νŠΈ μ‹œμž‘")
80
+ test_api()
81
+ # test_raw_response()
82
+ print("\nβœ… ν…ŒμŠ€νŠΈ μ™„λ£Œ!")
test_hf_with_token.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ import os # os λͺ¨λ“ˆ μΆ”κ°€
4
+ from dotenv import load_dotenv
5
+ load_dotenv()
6
+
7
+ # 1. ν™˜κ²½ λ³€μˆ˜μ—μ„œ ν—ˆκΉ…νŽ˜μ΄μŠ€ 토큰을 κ°€μ Έμ˜΅λ‹ˆλ‹€.
8
+ # ν„°λ―Έλ„μ—μ„œ `set HUGGING_FACE_TOKEN=hf_...` (Windows) λ˜λŠ”
9
+ # `export HUGGING_FACE_TOKEN=hf_...` (Mac/Linux) λͺ…λ ΉμœΌλ‘œ 미리 μ„€μ •ν•©λ‹ˆλ‹€.
10
+ HF_TOKEN = os.getenv("HF_TOKEN")
11
+
12
+ # ν—ˆκΉ…νŽ˜μ΄μŠ€ FastAPI μ„œλ²„ URL
13
+ HF_API_BASE = "https://gbrabbit-lily-fast-api.hf.space"
14
+
15
+ def test_generate_text():
16
+ """ν…μŠ€νŠΈ 생성 ν…ŒμŠ€νŠΈ (인증 μΆ”κ°€)"""
17
+ print("\nπŸ” ν…μŠ€νŠΈ 생성 ν…ŒμŠ€νŠΈ...")
18
+
19
+ if not HF_TOKEN:
20
+ print("❌ HUGGING_FACE_TOKEN ν™˜κ²½ λ³€μˆ˜κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
21
+ return False
22
+
23
+ try:
24
+ # 2. 인증 토큰을 담을 헀더(headers)λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
25
+ headers = {
26
+ "Authorization": f"Bearer {HF_TOKEN}"
27
+ }
28
+
29
+ data = {
30
+ 'prompt': 'μ•ˆλ…•ν•˜μ„Έμš”! Private μŠ€νŽ˜μ΄μŠ€μ—μ„œ 잘 μ§€λ‚΄μ‹œλ‚˜μš”?',
31
+ 'max_length': 20
32
+ }
33
+
34
+ print(f"πŸ“€ μš”μ²­ 데이터 (Form): {json.dumps(data, ensure_ascii=False)}")
35
+
36
+ # 3. requests.post 호좜 μ‹œ headers νŒŒλΌλ―Έν„°λ₯Ό μΆ”κ°€ν•©λ‹ˆλ‹€.
37
+ response = requests.post(
38
+ f"{HF_API_BASE}/generate",
39
+ headers=headers, # <<-- 인증 헀더 μΆ”κ°€!
40
+ data=data,
41
+ timeout=2000
42
+ )
43
+
44
+ print(f"βœ… μƒνƒœ μ½”λ“œ: {response.status_code}") # 이제 200이 ν‘œμ‹œλ  κ²ƒμž…λ‹ˆλ‹€.
45
+
46
+ if response.status_code == 200:
47
+ result = response.json()
48
+ print(f"βœ… 응닡: {json.dumps(result, indent=2, ensure_ascii=False)}")
49
+ else:
50
+ print(f"❌ 응닡: {response.text}")
51
+
52
+ return response.status_code == 200
53
+
54
+ except Exception as e:
55
+ print(f"❌ ν…μŠ€νŠΈ 생성 ν…ŒμŠ€νŠΈ μ‹€νŒ¨: {e}")
56
+ return False
57
+
58
+ # 슀크립트 μ‹€ν–‰
59
+ if __name__ == "__main__":
60
+ test_generate_text()
test_log.md ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ (lily_llm_env) C:\Project\lily_generate_project\lily_generate_package>python test.py
3
+ πŸ§ͺ Lily LLM API ν…ŒμŠ€νŠΈ μ‹œμž‘
4
+
5
+ ==================================================
6
+ ν…ŒμŠ€νŠΈ 1: μ•ˆλ…•ν•˜μ„Έμš”!
7
+ ==================================================
8
+ βœ… 성곡!
9
+ πŸ“ μƒμ„±λœ ν…μŠ€νŠΈ: 'Hello! How can I assist you today?'
10
+ ⏱️ 처리 μ‹œκ°„: 154.13초
11
+ πŸ€– λͺ¨λΈ: kanana-1.5-v-3b-instruct
12
+
13
+ βœ… ν…ŒμŠ€νŠΈ μ™„λ£Œ!
14
+
15
+ (lily_llm_env) C:\Project\lily_generate_project\lily_generate_package>python test.py
16
+ πŸ§ͺ Lily LLM API ν…ŒμŠ€νŠΈ μ‹œμž‘
17
+
18
+ ==================================================
19
+ ν…ŒμŠ€νŠΈ 1: μ•ˆλ…•ν•˜μ„Έμš”!
20
+ ==================================================
21
+ βœ… 성곡!
22
+ πŸ“ μƒμ„±λœ ν…μŠ€νŠΈ: 'Hello! How can I assist you today? We're here to help with any questions or tasks you'
23
+ ⏱️ 처리 μ‹œκ°„: 217.69초
24
+ πŸ€– λͺ¨λΈ: kanana-1.5-v-3b-instruct
25
+
26
+
27
+
28
+
29
+
30
+ -----
31
+
32
+
33
+
34
+
35
+ INFO: 127.0.0.1:62794 - "POST /generate HTTP/1.1" 500 Internal Server Error
36
+ ERROR: Exception in ASGI application
37
+ Traceback (most recent call last):
38
+ File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\uvicorn\protocols\http\httptools_impl.py", line 409, in run_asgi
39
+ result = await app( # type: ignore[func-returns-value]
40
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
41
+ File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\uvicorn\middleware\proxy_headers.py", line 60, in __call__
42
+ return await self.app(scope, receive, send)
43
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
44
+ File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\fastapi\applications.py", line 1054, in __call__
45
+ await super().__call__(scope, receive, send)
46
+ File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\applications.py", line 113, in __call__
47
+ await self.middleware_stack(scope, receive, send)
48
+ File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\middleware\errors.py", line 186, in __call__
49
+ raise exc
50
+ File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\middleware\errors.py", line 164, in __call__
51
+ await self.app(scope, receive, _send)
52
+ File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\middleware\cors.py", line 85, in __call__
53
+ await self.app(scope, receive, send)
54
+ File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\middleware\exceptions.py", line 63, in __call__
55
+ await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
56
+ File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\_exception_handler.py", line 53, in wrapped_app
57
+ raise exc
58
+ File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\_exception_handler.py", line 42, in wrapped_app
59
+ await app(scope, receive, sender)
60
+ File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\routing.py", line 716, in __call__
61
+ await self.middleware_stack(scope, receive, send)
62
+ File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\routing.py", line 736, in app
63
+ await route.handle(scope, receive, send)
64
+ File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\routing.py", line 290, in handle
65
+ await self.app(scope, receive, send)
66
+ File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\routing.py", line 78, in app
67
+ await wrap_app_handling_exceptions(app, request)(scope, receive, send)
68
+ File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\_exception_handler.py", line 53, in wrapped_app
69
+ raise exc
70
+ File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\_exception_handler.py", line 42, in wrapped_app
71
+ await app(scope, receive, sender)
72
+ File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\routing.py", line 75, in app
73
+ response = await f(request)
74
+ ^^^^^^^^^^^^^^^^
75
+ File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\fastapi\routing.py", line 302, in app
76
+ raw_response = await run_endpoint_function(
77
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
78
+ File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\fastapi\routing.py", line 213, in run_endpoint_function
79
+ return await dependant.call(**values)
80
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
81
+ File "C:\Project\lily_generate_project\lily_generate_package\lily_llm_api\app_v2.py", line 372, in generate
82
+ result = await loop.run_in_executor(
83
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^
84
+ File "C:\Users\gigab\AppData\Local\Programs\Python\Python311\Lib\concurrent\futures\thread.py", line 58, in run
85
+ result = self.fn(*self.args, **self.kwargs)
86
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
87
+ File "C:\Project\lily_generate_project\lily_generate_package\lily_llm_api\app_v2.py", line 303, in generate_sync
88
+ inputs = tokenizer.encode_prompt(prompt=formatted_prompt, image_meta=combined_image_metas)
89
+ ^^^^^^^^^^^^^^^^^^^^^^^
90
+ File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\transformers\tokenization_utils_base.py", line 1099, in __getattr__
91
+ raise AttributeError(f"{self.__class__.__name__} has no attribute {key}")
92
+ AttributeError: PreTrainedTokenizerFast has no attribute encode_prompt
93
+
94
+
95
+
96
+
97
+ ---
98
+
99
+
100
+
101
+
102
+
103
+ (lily_llm_env) C:\Project\lily_generate_project\lily_generate_package>python test.py
104
+ πŸ§ͺ Lily LLM API ν…ŒμŠ€νŠΈ μ‹œμž‘
105
+
106
+ ==================================================
107
+ ν…ŒμŠ€νŠΈ 1: μ•ˆλ…•ν•˜μ„Έμš”!
108
+ ==================================================
109
+ βœ… 성곡!
110
+ πŸ“ μƒμ„±λœ ν…μŠ€νŠΈ: 'Hello! How can I assist you today? We're here to help with any questions or tasks you'
111
+ ⏱️ 처리 μ‹œκ°„: 217.69초
112
+ πŸ€– λͺ¨λΈ: kanana-1.5-v-3b-instruct
113
+
114
+ βœ… ν…ŒμŠ€νŠΈ μ™„λ£Œ!
115
+
116
+ (lily_llm_env) C:\Project\lily_generate_project\lily_generate_package>python test.py
117
+ πŸ§ͺ Lily LLM API ν…ŒμŠ€νŠΈ μ‹œμž‘
118
+
119
+ ==================================================
120
+ ν…ŒμŠ€νŠΈ 1: μ•ˆλ…•ν•˜μ„Έμš”!
121
+ ==================================================
122
+ βœ… 성곡!
123
+ πŸ“ μƒμ„±λœ ν…μŠ€νŠΈ: '"μ•ˆλ…•ν•˜μ„Έμš”!"
124
+
125
+ 인사: μ•ˆλ…•ν•˜μ‹­λ‹ˆκΉŒ?
126
+
127
+ 질문: "제'
128
+ ⏱️ 처리 μ‹œκ°„: 20.50초
129
+ πŸ€– λͺ¨λΈ: Polyglot-ko-1.3b-chat
130
+
131
+ βœ… ν…ŒμŠ€νŠΈ μ™„λ£Œ!
132
+
133
+ ---
134
+
135
+
136
+
137
+ (lily_llm_env) C:\Project\lily_generate_project\lily_generate_package>python test.py
138
+ πŸ§ͺ Lily LLM API ν…ŒμŠ€νŠΈ μ‹œμž‘
139
+
140
+ ==================================================
141
+ ν…ŒμŠ€νŠΈ 1: μ•ˆλ…•ν•˜μ„Έμš”!
142
+ ==================================================
143
+ βœ… 성곡!
144
+ πŸ“ μƒμ„±λœ ν…μŠ€νŠΈ: '&&...
145
+
146
+ μ•ˆλ…•ν•˜μ„Έμš”, μ €λŠ” Alisterμž…λ‹ˆλ‹€'
147
+ ⏱️ 처리 μ‹œκ°„: 17.73초
148
+ πŸ€– λͺ¨λΈ: Polyglot-ko-1.3b-chat
149
+
150
+ βœ… ν…ŒμŠ€νŠΈ μ™„λ£Œ!
151
+
152
+
153
+ --
154
+
155
+
156
+
157
+
158
+
159
+
160
+
161
+
162
+
163
+
164
+
165
+