rphrp1985 commited on
Commit
be1a281
·
verified ·
1 Parent(s): 05ca20e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -3
app.py CHANGED
@@ -8,6 +8,7 @@ import subprocess
8
  ### monkey patch
9
 
10
  import llama_cpp._internals as internals
 
11
 
12
 
13
  # 2️⃣ Monkey patch BEFORE creating Llama()
@@ -235,6 +236,126 @@ llm_model_qwen= None
235
 
236
 
237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
 
239
 
240
 
@@ -267,11 +388,15 @@ def respond(
267
  flash_attn=True,
268
  n_gpu_layers=-1,
269
  n_batch=2048, # increase
270
- n_ctx= 4098, # reduce if you don’t need 8k
271
  n_threads=16, # set to your CPU cores
272
  use_mlock=True,
273
  verbose=True,
274
- chat_format="qwen"
 
 
 
 
275
  )
276
 
277
  x=llm_model_qwen.create_chat_completion(
@@ -293,7 +418,7 @@ def respond(
293
  flash_attn=True,
294
  n_gpu_layers=-1,
295
  n_batch=2048, # increase
296
- n_ctx=4098, # reduce if you don’t need 8k
297
  n_threads=16, # set to your CPU cores
298
  use_mlock=True,
299
  verbose=True,
 
8
  ### monkey patch
9
 
10
  import llama_cpp._internals as internals
11
+ from llama_cpp.llama_chat_format import Qwen3VLChatHandler
12
 
13
 
14
  # 2️⃣ Monkey patch BEFORE creating Llama()
 
236
 
237
 
238
 
239
+ _IMAGE_MIME_TYPES = {
240
+ # Most common formats
241
+ '.png': 'image/png',
242
+ '.jpg': 'image/jpeg',
243
+ '.jpeg': 'image/jpeg',
244
+ '.gif': 'image/gif',
245
+ '.webp': 'image/webp',
246
+
247
+ # Next-generation formats
248
+ '.avif': 'image/avif',
249
+ '.jp2': 'image/jp2',
250
+ '.j2k': 'image/jp2',
251
+ '.jpx': 'image/jp2',
252
+
253
+ # Legacy / Windows formats
254
+ '.bmp': 'image/bmp',
255
+ '.ico': 'image/x-icon',
256
+ '.pcx': 'image/x-pcx',
257
+ '.tga': 'image/x-tga',
258
+ '.icns': 'image/icns',
259
+
260
+ # Professional / Scientific imaging
261
+ '.tif': 'image/tiff',
262
+ '.tiff': 'image/tiff',
263
+ '.eps': 'application/postscript',
264
+ '.dds': 'image/vnd-ms.dds',
265
+ '.dib': 'image/dib',
266
+ '.sgi': 'image/sgi',
267
+
268
+ # Portable Map formats (PPM/PGM/PBM)
269
+ '.pbm': 'image/x-portable-bitmap',
270
+ '.pgm': 'image/x-portable-graymap',
271
+ '.ppm': 'image/x-portable-pixmap',
272
+
273
+ # Miscellaneous / Older formats
274
+ '.xbm': 'image/x-xbitmap',
275
+ '.mpo': 'image/mpo',
276
+ '.msp': 'image/msp',
277
+ '.im': 'image/x-pillow-im',
278
+ '.qoi': 'image/qoi',
279
+ }
280
+
281
+ def image_to_base64_data_uri(
282
+ file_path: str,
283
+ *,
284
+ fallback_mime: str = "application/octet-stream"
285
+ ) -> str:
286
+ """
287
+ Convert a local image file to a base64-encoded data URI with the correct MIME type.
288
+
289
+ Supports 20+ image formats (PNG, JPEG, WebP, AVIF, BMP, ICO, TIFF, etc.).
290
+
291
+ Args:
292
+ file_path: Path to the image file on disk.
293
+ fallback_mime: MIME type used when the file extension is unknown.
294
+
295
+ Returns:
296
+ A valid data URI string (e.g., data:image/webp;base64,...).
297
+
298
+ Raises:
299
+ FileNotFoundError: If the file does not exist.
300
+ OSError: If reading the file fails.
301
+ """
302
+ if not os.path.isfile(file_path):
303
+ raise FileNotFoundError(f"Image file not found: {file_path}")
304
+
305
+ extension = os.path.splitext(file_path)[1].lower()
306
+ mime_type = _IMAGE_MIME_TYPES.get(extension, fallback_mime)
307
+
308
+ if mime_type == fallback_mime:
309
+ print(f"Warning: Unknown extension '{extension}' for '{file_path}'. "
310
+ f"Using fallback MIME type: {fallback_mime}")
311
+
312
+ try:
313
+ with open(file_path, "rb") as img_file:
314
+ encoded_data = base64.b64encode(img_file.read()).decode("utf-8")
315
+ except OSError as e:
316
+ raise OSError(f"Failed to read image file '{file_path}': {e}") from e
317
+
318
+ return f"data:{mime_type};base64,{encoded_data}"
319
+
320
+
321
+
322
+ ###################### sample code ################################################
323
+ # --- Main Logic for Image Processing ---
324
+
325
+ # # 1. Create a list containing all image paths
326
+ # image_paths = [
327
+ # r'./scene.jpeg',
328
+ # r'./cat.png',
329
+ # r'./network.webp',
330
+ # # Add more image paths here if needed
331
+ # ]
332
+
333
+ # # 2. Create an empty list to store the message objects (images and text)
334
+ # images_messages = []
335
+
336
+
337
+
338
+ # # 3. Loop through the image path list, convert each image to a Data URI,
339
+ # # and add it to the message list as an image_url object.
340
+ # for path in image_paths:
341
+ # data_uri = image_to_base64_data_uri(path)
342
+ # images_messages.append({"type": "image_url", "image_url": {"url": data_uri}})
343
+
344
+ # # 4. Add the final text prompt at the end of the list
345
+ # images_messages.append({"type": "text", "text": "Describes the images."})
346
+
347
+ # # 5. Use this list to build the chat_completion request
348
+ # res = llm.create_chat_completion(
349
+ # messages=[
350
+ # {"role": "system", "content": "You are a highly accurate vision-language assistant. Provide detailed, precise, and well-structured image descriptions."},
351
+ # # The user's content is the list containing both images and text
352
+ # {"role": "user", "content": images_messages}
353
+ # ]
354
+ # )
355
+
356
+ # # Print the assistant's response
357
+ # print(res["choices"][0]["message"]["content"])
358
+
359
 
360
 
361
 
 
388
  flash_attn=True,
389
  n_gpu_layers=-1,
390
  n_batch=2048, # increase
391
+ n_ctx= 8196, # reduce if you don’t need 8k
392
  n_threads=16, # set to your CPU cores
393
  use_mlock=True,
394
  verbose=True,
395
+ chat_handler=Qwen3VLChatHandler(
396
+ clip_model_path=MMPROJ_PATH,
397
+ force_reasoning=True,
398
+ image_min_tokens=1024, # Note: Qwen-VL models require at minimum 1024 image tokens to function correctly on bbox grounding tasks
399
+ ),
400
  )
401
 
402
  x=llm_model_qwen.create_chat_completion(
 
418
  flash_attn=True,
419
  n_gpu_layers=-1,
420
  n_batch=2048, # increase
421
+ n_ctx=8196, # reduce if you don’t need 8k
422
  n_threads=16, # set to your CPU cores
423
  use_mlock=True,
424
  verbose=True,