EveryonesGPT_Vision_Instruct

Sleeping

App Files Files Community

HayatoHongoEveryonesAI commited on Jan 14

Commit

aa68cd8

1 Parent(s): 5e541e3

yiled token

Browse files

Files changed (1) hide show

app.py +18 -40

app.py CHANGED Viewed

@@ -50,67 +50,45 @@ def parse_message(message: dict):
 # GPU inference (single-turn, VLM only)
 # =====================================================
 @spaces.GPU
-def chat_fn(
-    message,
-    history,        # unused (single-turn)
-    temperature,
-    top_p,
-    top_k,
-):
-    print("[DEBUG] chat_fn called")
-    print("[DEBUG] temperature:", temperature, "top_p:", top_p, "top_k:", top_k)
     text, image = parse_message(message)
     if image is None:
-        print("[DEBUG] image is None -> returning error message")
-        return "Image input is required."
     device = "cuda"
-    print("[DEBUG] moving model to GPU")
     model_gpu = model.to(device)
-    # AFTER（ログ以外で唯一必要な修正）
     if isinstance(image, str):
         image = Image.open(image)
-    print("[DEBUG] preprocessing image")
     image_tensor = image_processor(
         images=image.convert("RGB"),
         return_tensors="pt"
     )["pixel_values"].to(device)
-    print("[DEBUG] image_tensor shape:", image_tensor.shape)
     prompt = (
         "<user>\n"
         f"{text}\n"
         "<assistant>\n"
     )
-    print("[DEBUG] prompt:")
-    print(prompt)
-    def stream():
-        print("[DEBUG] stream generator started")
-        for chunk in vlm_infer_stream(
-            model=model_gpu,
-            image_tensor=image_tensor,
-            prompt=prompt,
-            max_new_tokens=256,
-            temperature=temperature,
-            top_p=top_p if top_p > 0 else None,
-            top_k=top_k if top_k > 0 else None,
-        ):
-            print("[DEBUG] yield chunk:", repr(chunk))
-            yield chunk
-        print("[DEBUG] inference finished, cleaning up GPU")
-        model_gpu.to("cpu")
-        torch.cuda.empty_cache()
-        print("[DEBUG] GPU cleanup done")
-    return stream()
 # =====================================================

 # GPU inference (single-turn, VLM only)
 # =====================================================
 @spaces.GPU
+def chat_fn(message, history, temperature, top_p, top_k):
     text, image = parse_message(message)
     if image is None:
+        yield "Image input is required."
+        return
     device = "cuda"
     model_gpu = model.to(device)
     if isinstance(image, str):
+        from PIL import Image
         image = Image.open(image)
     image_tensor = image_processor(
         images=image.convert("RGB"),
         return_tensors="pt"
     )["pixel_values"].to(device)
     prompt = (
         "<user>\n"
         f"{text}\n"
         "<assistant>\n"
     )
+    for chunk in vlm_infer_stream(
+        model=model_gpu,
+        image_tensor=image_tensor,
+        prompt=prompt,
+        max_new_tokens=256,
+        temperature=temperature,
+        top_p=top_p if top_p > 0 else None,
+        top_k=top_k if top_k > 0 else None,
+    ):
+        yield chunk
+    model_gpu.to("cpu")
+    torch.cuda.empty_cache()
 # =====================================================