gbrabbit commited on
Commit
88e4071
Β·
1 Parent(s): acb109a

Auto commit at 25-2025-08 19:01:34

Browse files
lily_llm_api/services/generation_service.py CHANGED
@@ -152,7 +152,7 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
152
 
153
  # 2) μ—¬μ „νžˆ μ—†μœΌλ©΄ μ„Έμ…˜ μΊμ‹œμ—μ„œ 볡ꡬ
154
  if (not all_image_data or len([img for img in all_image_data if img]) == 0) and session_id and session_id in _session_image_cache and len(_session_image_cache[session_id]) > 0:
155
- cached_imgs = _session_image_cache[session_id]
156
  all_image_data.extend(cached_imgs)
157
  print(f"πŸ” [DEBUG] μ„Έμ…˜ μΊμ‹œμ—μ„œ 이전 이미지 {len(cached_imgs)}개 볡ꡬ (μ„Έμ…˜: {session_id})")
158
  else:
@@ -493,7 +493,7 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
493
 
494
  print(f"πŸ” [DEBUG] ν”„λ‘¬ν”„νŠΈ ꡬ성 μ™„λ£Œ - 길이: {len(formatted_prompt) if formatted_prompt else 0}")
495
  if debug_log_prompt:
496
- print(f"πŸ” [DEBUG] μ΅œμ’… ν”„λ‘¬ν”„νŠΈ: {formatted_prompt}")
497
 
498
  # --- 3. ν† ν¬λ‚˜μ΄μ§• ---
499
  print(f"πŸ” [DEBUG] ν† ν¬λ‚˜μ΄μ§• μ‹œμž‘")
@@ -891,11 +891,11 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
891
 
892
  # πŸ”’ μ•ˆμ „ κ°€λ“œ: image_token_thwκ°€ 비정상일 λ•Œ -1 토큰이 μƒμ„±λ˜μ§€ μ•Šλ„λ‘ λ°©μ§€
893
  try:
894
- if 'image_token_thw' in processed_image_metas:
895
- it = processed_image_metas['image_token_thw']
896
- if isinstance(it, torch.Tensor) and (it.numel() == 0 or it.shape[-1] != 3):
897
- print(f"⚠️ [DEBUG] image_token_thw 비정상: {it.shape if hasattr(it,'shape') else type(it)} -> μ•ˆμ „ κΈ°λ³Έκ°’ 적용")
898
- processed_image_metas['image_token_thw'] = torch.tensor([[1,1,1]] * len(all_pixel_values), dtype=torch.long).unsqueeze(0)
899
  except Exception as _safe_e:
900
  print(f"⚠️ [DEBUG] image_token_thw μ•ˆμ „ν™” μ‹€νŒ¨: {_safe_e}")
901
 
@@ -1062,37 +1062,37 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
1062
 
1063
  import torch as _torch
1064
  with _torch.inference_mode():
1065
- generated_ids = lora_model.generate(
1066
- **lora_inputs,
1067
- **gen_config
1068
- )
1069
  else:
1070
  print(f"⚠️ [DEBUG] LoRA λͺ¨λΈμ„ κ°€μ Έμ˜¬ 수 μ—†μŒ, κΈ°λ³Έ λͺ¨λΈ μ‚¬μš©")
1071
  import torch as _torch
1072
  with _torch.inference_mode():
 
 
 
 
 
 
 
 
 
1073
  generated_ids = current_model.generate(
1074
  input_ids=input_ids,
1075
  attention_mask=attention_mask,
1076
  **gen_config
1077
  )
1078
- else:
1079
- print(f"πŸ” [DEBUG] LoRA μ–΄λŒ‘ν„° μ—†μŒ, κΈ°λ³Έ λͺ¨λΈ μ‚¬μš©")
1080
- import torch as _torch
1081
- with _torch.inference_mode():
1082
  generated_ids = current_model.generate(
1083
  input_ids=input_ids,
1084
  attention_mask=attention_mask,
1085
  **gen_config
1086
  )
1087
- except ImportError:
1088
- print(f"πŸ” [DEBUG] LoRA 지원 μ•ˆλ¨, κΈ°λ³Έ λͺ¨λΈ μ‚¬μš©")
1089
- import torch as _torch
1090
- with _torch.inference_mode():
1091
- generated_ids = current_model.generate(
1092
- input_ids=input_ids,
1093
- attention_mask=attention_mask,
1094
- **gen_config
1095
- )
1096
 
1097
  print(f"πŸ” [DEBUG] λͺ¨λΈ 생성 μ™„λ£Œ μ‹œκ°„: {time.time()}")
1098
 
@@ -1119,9 +1119,9 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
1119
  # μƒμ„±λœ ν…μŠ€νŠΈ λ””μ½”λ”©
1120
  full_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
1121
  if os.getenv('LILY_DEBUG_LOG_TEXT', '0') == '1':
1122
- print(f"πŸ” [DEBUG] 전체 ν…μŠ€νŠΈ 길이: {len(full_text)}")
1123
- print(f"πŸ” [DEBUG] 전체 생성 ν…μŠ€νŠΈ (Raw): \n---\n{full_text}\n---")
1124
- print(f"πŸ” [DEBUG] μ‚¬μš©λœ ν”„λ‘¬ν”„νŠΈ: {formatted_prompt}")
1125
 
1126
  # ν”„λ‘œν•„λ³„ 응닡 μΆ”μΆœ (μ•ˆμ „ν•œ 방식)
1127
  if hasattr(current_profile, 'extract_response'):
 
152
 
153
  # 2) μ—¬μ „νžˆ μ—†μœΌλ©΄ μ„Έμ…˜ μΊμ‹œμ—μ„œ 볡ꡬ
154
  if (not all_image_data or len([img for img in all_image_data if img]) == 0) and session_id and session_id in _session_image_cache and len(_session_image_cache[session_id]) > 0:
155
+ cached_imgs = _session_image_cache[session_id]
156
  all_image_data.extend(cached_imgs)
157
  print(f"πŸ” [DEBUG] μ„Έμ…˜ μΊμ‹œμ—μ„œ 이전 이미지 {len(cached_imgs)}개 볡ꡬ (μ„Έμ…˜: {session_id})")
158
  else:
 
493
 
494
  print(f"πŸ” [DEBUG] ν”„λ‘¬ν”„νŠΈ ꡬ성 μ™„λ£Œ - 길이: {len(formatted_prompt) if formatted_prompt else 0}")
495
  if debug_log_prompt:
496
+ print(f"πŸ” [DEBUG] μ΅œμ’… ν”„λ‘¬ν”„νŠΈ: {formatted_prompt}")
497
 
498
  # --- 3. ν† ν¬λ‚˜μ΄μ§• ---
499
  print(f"πŸ” [DEBUG] ν† ν¬λ‚˜μ΄μ§• μ‹œμž‘")
 
891
 
892
  # πŸ”’ μ•ˆμ „ κ°€λ“œ: image_token_thwκ°€ 비정상일 λ•Œ -1 토큰이 μƒμ„±λ˜μ§€ μ•Šλ„λ‘ λ°©μ§€
893
  try:
894
+ if 'image_token_thw' in processed_image_metas:
895
+ it = processed_image_metas['image_token_thw']
896
+ if isinstance(it, torch.Tensor) and (it.numel() == 0 or it.shape[-1] != 3):
897
+ print(f"⚠️ [DEBUG] image_token_thw 비정상: {it.shape if hasattr(it,'shape') else type(it)} -> μ•ˆμ „ κΈ°λ³Έκ°’ 적용")
898
+ processed_image_metas['image_token_thw'] = torch.tensor([[1,1,1]] * len(all_pixel_values), dtype=torch.long).unsqueeze(0)
899
  except Exception as _safe_e:
900
  print(f"⚠️ [DEBUG] image_token_thw μ•ˆμ „ν™” μ‹€νŒ¨: {_safe_e}")
901
 
 
1062
 
1063
  import torch as _torch
1064
  with _torch.inference_mode():
1065
+ generated_ids = lora_model.generate(
1066
+ **lora_inputs,
1067
+ **gen_config
1068
+ )
1069
  else:
1070
  print(f"⚠️ [DEBUG] LoRA λͺ¨λΈμ„ κ°€μ Έμ˜¬ 수 μ—†μŒ, κΈ°λ³Έ λͺ¨λΈ μ‚¬μš©")
1071
  import torch as _torch
1072
  with _torch.inference_mode():
1073
+ generated_ids = current_model.generate(
1074
+ input_ids=input_ids,
1075
+ attention_mask=attention_mask,
1076
+ **gen_config
1077
+ )
1078
+ else:
1079
+ print(f"πŸ” [DEBUG] LoRA μ–΄λŒ‘ν„° μ—†μŒ, κΈ°λ³Έ λͺ¨λΈ μ‚¬μš©")
1080
+ import torch as _torch
1081
+ with _torch.inference_mode():
1082
  generated_ids = current_model.generate(
1083
  input_ids=input_ids,
1084
  attention_mask=attention_mask,
1085
  **gen_config
1086
  )
1087
+ except ImportError:
1088
+ print(f"πŸ” [DEBUG] LoRA 지원 μ•ˆλ¨, κΈ°λ³Έ λͺ¨λΈ μ‚¬μš©")
1089
+ import torch as _torch
1090
+ with _torch.inference_mode():
1091
  generated_ids = current_model.generate(
1092
  input_ids=input_ids,
1093
  attention_mask=attention_mask,
1094
  **gen_config
1095
  )
 
 
 
 
 
 
 
 
 
1096
 
1097
  print(f"πŸ” [DEBUG] λͺ¨λΈ 생성 μ™„λ£Œ μ‹œκ°„: {time.time()}")
1098
 
 
1119
  # μƒμ„±λœ ν…μŠ€νŠΈ λ””μ½”λ”©
1120
  full_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
1121
  if os.getenv('LILY_DEBUG_LOG_TEXT', '0') == '1':
1122
+ print(f"πŸ” [DEBUG] 전체 ν…μŠ€νŠΈ 길이: {len(full_text)}")
1123
+ print(f"πŸ” [DEBUG] 전체 생성 ν…μŠ€νŠΈ (Raw): \n---\n{full_text}\n---")
1124
+ print(f"πŸ” [DEBUG] μ‚¬μš©λœ ν”„λ‘¬ν”„νŠΈ: {formatted_prompt}")
1125
 
1126
  # ν”„λ‘œν•„λ³„ 응닡 μΆ”μΆœ (μ•ˆμ „ν•œ 방식)
1127
  if hasattr(current_profile, 'extract_response'):