HayatoHongoEveryonesAI commited on
Commit
aa68cd8
·
1 Parent(s): 5e541e3

yiled token

Browse files
Files changed (1) hide show
  1. app.py +18 -40
app.py CHANGED
@@ -50,67 +50,45 @@ def parse_message(message: dict):
50
  # GPU inference (single-turn, VLM only)
51
  # =====================================================
52
  @spaces.GPU
53
- def chat_fn(
54
- message,
55
- history, # unused (single-turn)
56
- temperature,
57
- top_p,
58
- top_k,
59
- ):
60
- print("[DEBUG] chat_fn called")
61
- print("[DEBUG] temperature:", temperature, "top_p:", top_p, "top_k:", top_k)
62
-
63
  text, image = parse_message(message)
64
 
65
  if image is None:
66
- print("[DEBUG] image is None -> returning error message")
67
- return "Image input is required."
68
 
69
  device = "cuda"
70
- print("[DEBUG] moving model to GPU")
71
  model_gpu = model.to(device)
72
 
73
- # AFTER(ログ以外で唯一必要な修正)
74
  if isinstance(image, str):
 
75
  image = Image.open(image)
76
 
77
- print("[DEBUG] preprocessing image")
78
  image_tensor = image_processor(
79
  images=image.convert("RGB"),
80
  return_tensors="pt"
81
  )["pixel_values"].to(device)
82
 
83
- print("[DEBUG] image_tensor shape:", image_tensor.shape)
84
-
85
  prompt = (
86
  "<user>\n"
87
  f"{text}\n"
88
  "<assistant>\n"
89
  )
90
 
91
- print("[DEBUG] prompt:")
92
- print(prompt)
93
-
94
- def stream():
95
- print("[DEBUG] stream generator started")
96
- for chunk in vlm_infer_stream(
97
- model=model_gpu,
98
- image_tensor=image_tensor,
99
- prompt=prompt,
100
- max_new_tokens=256,
101
- temperature=temperature,
102
- top_p=top_p if top_p > 0 else None,
103
- top_k=top_k if top_k > 0 else None,
104
- ):
105
- print("[DEBUG] yield chunk:", repr(chunk))
106
- yield chunk
107
-
108
- print("[DEBUG] inference finished, cleaning up GPU")
109
- model_gpu.to("cpu")
110
- torch.cuda.empty_cache()
111
- print("[DEBUG] GPU cleanup done")
112
-
113
- return stream()
114
 
115
 
116
  # =====================================================
 
50
  # GPU inference (single-turn, VLM only)
51
  # =====================================================
52
  @spaces.GPU
53
+ def chat_fn(message, history, temperature, top_p, top_k):
 
 
 
 
 
 
 
 
 
54
  text, image = parse_message(message)
55
 
56
  if image is None:
57
+ yield "Image input is required."
58
+ return
59
 
60
  device = "cuda"
 
61
  model_gpu = model.to(device)
62
 
 
63
  if isinstance(image, str):
64
+ from PIL import Image
65
  image = Image.open(image)
66
 
 
67
  image_tensor = image_processor(
68
  images=image.convert("RGB"),
69
  return_tensors="pt"
70
  )["pixel_values"].to(device)
71
 
 
 
72
  prompt = (
73
  "<user>\n"
74
  f"{text}\n"
75
  "<assistant>\n"
76
  )
77
 
78
+ for chunk in vlm_infer_stream(
79
+ model=model_gpu,
80
+ image_tensor=image_tensor,
81
+ prompt=prompt,
82
+ max_new_tokens=256,
83
+ temperature=temperature,
84
+ top_p=top_p if top_p > 0 else None,
85
+ top_k=top_k if top_k > 0 else None,
86
+ ):
87
+ yield chunk
88
+
89
+ model_gpu.to("cpu")
90
+ torch.cuda.empty_cache()
91
+
 
 
 
 
 
 
 
 
 
92
 
93
 
94
  # =====================================================