Spaces:

gbrabbit
/

lily_fast_api

Sleeping

App Files Files Community

gbrabbit commited on Aug 25, 2025

Commit

88e4071

1 Parent(s): acb109a

Auto commit at 25-2025-08 19:01:34

Browse files

Files changed (1) hide show

lily_llm_api/services/generation_service.py +27 -27

lily_llm_api/services/generation_service.py CHANGED Viewed

@@ -152,7 +152,7 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
                 # 2) 여전히 없으면 세션 캐시에서 복구
                 if (not all_image_data or len([img for img in all_image_data if img]) == 0) and session_id and session_id in _session_image_cache and len(_session_image_cache[session_id]) > 0:
-                cached_imgs = _session_image_cache[session_id]
                 all_image_data.extend(cached_imgs)
                 print(f"🔍 [DEBUG] 세션 캐시에서 이전 이미지 {len(cached_imgs)}개 복구 (세션: {session_id})")
             else:
@@ -493,7 +493,7 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
         print(f"🔍 [DEBUG] 프롬프트 구성 완료 - 길이: {len(formatted_prompt) if formatted_prompt else 0}")
         if debug_log_prompt:
-        print(f"🔍 [DEBUG] 최종 프롬프트: {formatted_prompt}")
         # --- 3. 토크나이징 ---
         print(f"🔍 [DEBUG] 토크나이징 시작")
@@ -891,11 +891,11 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
                     # 🔒 안전 가드: image_token_thw가 비정상일 때 -1 토큰이 생성되지 않도록 방지
                     try:
-                    if 'image_token_thw' in processed_image_metas:
-                            it = processed_image_metas['image_token_thw']
-                            if isinstance(it, torch.Tensor) and (it.numel() == 0 or it.shape[-1] != 3):
-                                print(f"⚠️ [DEBUG] image_token_thw 비정상: {it.shape if hasattr(it,'shape') else type(it)} -> 안전 기본값 적용")
-                                processed_image_metas['image_token_thw'] = torch.tensor([[1,1,1]] * len(all_pixel_values), dtype=torch.long).unsqueeze(0)
                     except Exception as _safe_e:
                         print(f"⚠️ [DEBUG] image_token_thw 안전화 실패: {_safe_e}")
@@ -1062,37 +1062,37 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
                                 import torch as _torch
                                 with _torch.inference_mode():
-                                generated_ids = lora_model.generate(
-                                    **lora_inputs,
-                                    **gen_config
-                                )
                             else:
                                 print(f"⚠️ [DEBUG] LoRA 모델을 가져올 수 없음, 기본 모델 사용")
                                 import torch as _torch
                                 with _torch.inference_mode():
                                 generated_ids = current_model.generate(
                                     input_ids=input_ids,
                                     attention_mask=attention_mask,
                                     **gen_config
                                 )
-                        else:
-                            print(f"🔍 [DEBUG] LoRA 어댑터 없음, 기본 모델 사용")
-                            import torch as _torch
-                            with _torch.inference_mode():
                             generated_ids = current_model.generate(
                                 input_ids=input_ids,
                                 attention_mask=attention_mask,
                                 **gen_config
                             )
-                    except ImportError:
-                        print(f"🔍 [DEBUG] LoRA 지원 안됨, 기본 모델 사용")
-                        import torch as _torch
-                        with _torch.inference_mode():
-                        generated_ids = current_model.generate(
-                            input_ids=input_ids,
-                            attention_mask=attention_mask,
-                            **gen_config
-                        )
                     print(f"🔍 [DEBUG] 모델 생성 완료 시간: {time.time()}")
@@ -1119,9 +1119,9 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
             # 생성된 텍스트 디코딩
             full_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
             if os.getenv('LILY_DEBUG_LOG_TEXT', '0') == '1':
-            print(f"🔍 [DEBUG] 전체 텍스트 길이: {len(full_text)}")
-            print(f"🔍 [DEBUG] 전체 생성 텍스트 (Raw): \n---\n{full_text}\n---")
-            print(f"🔍 [DEBUG] 사용된 프롬프트: {formatted_prompt}")
             # 프로필별 응답 추출 (안전한 방식)
             if hasattr(current_profile, 'extract_response'):

                 # 2) 여전히 없으면 세션 캐시에서 복구
                 if (not all_image_data or len([img for img in all_image_data if img]) == 0) and session_id and session_id in _session_image_cache and len(_session_image_cache[session_id]) > 0:
+                    cached_imgs = _session_image_cache[session_id]
                 all_image_data.extend(cached_imgs)
                 print(f"🔍 [DEBUG] 세션 캐시에서 이전 이미지 {len(cached_imgs)}개 복구 (세션: {session_id})")
             else:
         print(f"🔍 [DEBUG] 프롬프트 구성 완료 - 길이: {len(formatted_prompt) if formatted_prompt else 0}")
         if debug_log_prompt:
+            print(f"🔍 [DEBUG] 최종 프롬프트: {formatted_prompt}")
         # --- 3. 토크나이징 ---
         print(f"🔍 [DEBUG] 토크나이징 시작")
                     # 🔒 안전 가드: image_token_thw가 비정상일 때 -1 토큰이 생성되지 않도록 방지
                     try:
+                        if 'image_token_thw' in processed_image_metas:
+                                it = processed_image_metas['image_token_thw']
+                                if isinstance(it, torch.Tensor) and (it.numel() == 0 or it.shape[-1] != 3):
+                                    print(f"⚠️ [DEBUG] image_token_thw 비정상: {it.shape if hasattr(it,'shape') else type(it)} -> 안전 기본값 적용")
+                                    processed_image_metas['image_token_thw'] = torch.tensor([[1,1,1]] * len(all_pixel_values), dtype=torch.long).unsqueeze(0)
                     except Exception as _safe_e:
                         print(f"⚠️ [DEBUG] image_token_thw 안전화 실패: {_safe_e}")
                                 import torch as _torch
                                 with _torch.inference_mode():
+                                    generated_ids = lora_model.generate(
+                                        **lora_inputs,
+                                        **gen_config
+                                    )
                             else:
                                 print(f"⚠️ [DEBUG] LoRA 모델을 가져올 수 없음, 기본 모델 사용")
                                 import torch as _torch
                                 with _torch.inference_mode():
+                                    generated_ids = current_model.generate(
+                                        input_ids=input_ids,
+                                        attention_mask=attention_mask,
+                                        **gen_config
+                                    )
+                        else:
+                            print(f"🔍 [DEBUG] LoRA 어댑터 없음, 기본 모델 사용")
+                            import torch as _torch
+                            with _torch.inference_mode():
                                 generated_ids = current_model.generate(
                                     input_ids=input_ids,
                                     attention_mask=attention_mask,
                                     **gen_config
                                 )
+                    except ImportError:
+                        print(f"🔍 [DEBUG] LoRA 지원 안됨, 기본 모델 사용")
+                        import torch as _torch
+                        with _torch.inference_mode():
                             generated_ids = current_model.generate(
                                 input_ids=input_ids,
                                 attention_mask=attention_mask,
                                 **gen_config
                             )
                     print(f"🔍 [DEBUG] 모델 생성 완료 시간: {time.time()}")
             # 생성된 텍스트 디코딩
             full_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
             if os.getenv('LILY_DEBUG_LOG_TEXT', '0') == '1':
+                print(f"🔍 [DEBUG] 전체 텍스트 길이: {len(full_text)}")
+                print(f"🔍 [DEBUG] 전체 생성 텍스트 (Raw): \n---\n{full_text}\n---")
+                print(f"🔍 [DEBUG] 사용된 프롬프트: {formatted_prompt}")
             # 프로필별 응답 추출 (안전한 방식)
             if hasattr(current_profile, 'extract_response'):