| |
|
|
| import base64 |
| import os |
| from typing import Any, Optional |
|
|
| from openai import OpenAI |
|
|
|
|
| def get_image_base64_and_mime(image_path): |
| """Convert image file to base64 string and get MIME type""" |
| try: |
| |
| ext = os.path.splitext(image_path)[1].lower() |
| mime_types = { |
| ".jpg": "image/jpeg", |
| ".jpeg": "image/jpeg", |
| ".png": "image/png", |
| ".gif": "image/gif", |
| ".webp": "image/webp", |
| ".bmp": "image/bmp", |
| } |
| mime_type = mime_types.get(ext, "image/jpeg") |
|
|
| |
| with open(image_path, "rb") as image_file: |
| base64_data = base64.b64encode(image_file.read()).decode("utf-8") |
| return base64_data, mime_type |
| except Exception as e: |
| print(f"Error converting image to base64: {e}") |
| return None, None |
|
|
|
|
| def send_generate_request( |
| messages, |
| server_url=None, |
| model="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", |
| api_key=None, |
| max_tokens=4096, |
| ): |
| """ |
| Sends a request to the OpenAI-compatible API endpoint using the OpenAI client library. |
| |
| Args: |
| server_url (str): The base URL of the server, e.g. "http://127.0.0.1:8000" |
| messages (list): A list of message dicts, each containing role and content. |
| model (str): The model to use for generation (default: "llama-4") |
| max_tokens (int): Maximum number of tokens to generate (default: 4096) |
| |
| Returns: |
| str: The generated response text from the server. |
| """ |
| |
| processed_messages = [] |
| for message in messages: |
| processed_message = message.copy() |
| if message["role"] == "user" and "content" in message: |
| processed_content = [] |
| for c in message["content"]: |
| if isinstance(c, dict) and c.get("type") == "image": |
| |
| image_path = c["image"] |
|
|
| print("image_path", image_path) |
| new_image_path = image_path.replace( |
| "?", "%3F" |
| ) |
|
|
| |
| try: |
| base64_image, mime_type = get_image_base64_and_mime( |
| new_image_path |
| ) |
| if base64_image is None: |
| print( |
| f"Warning: Could not convert image to base64: {new_image_path}" |
| ) |
| continue |
|
|
| |
| processed_content.append( |
| { |
| "type": "image_url", |
| "image_url": { |
| "url": f"data:{mime_type};base64,{base64_image}", |
| "detail": "high", |
| }, |
| } |
| ) |
|
|
| except FileNotFoundError: |
| print(f"Warning: Image file not found: {new_image_path}") |
| continue |
| except Exception as e: |
| print(f"Warning: Error processing image {new_image_path}: {e}") |
| continue |
| else: |
| processed_content.append(c) |
|
|
| processed_message["content"] = processed_content |
| processed_messages.append(processed_message) |
|
|
| |
| client = OpenAI(api_key=api_key, base_url=server_url) |
|
|
| try: |
| print(f"🔍 Calling model {model}...") |
| response = client.chat.completions.create( |
| model=model, |
| messages=processed_messages, |
| max_completion_tokens=max_tokens, |
| n=1, |
| ) |
| |
|
|
| |
| if response.choices and len(response.choices) > 0: |
| return response.choices[0].message.content |
| else: |
| print(f"Unexpected response format: {response}") |
| return None |
|
|
| except Exception as e: |
| print(f"Request failed: {e}") |
| return None |
|
|
|
|
| def send_direct_request( |
| llm: Any, |
| messages: list[dict[str, Any]], |
| sampling_params: Any, |
| ) -> Optional[str]: |
| """ |
| Run inference on a vLLM model instance directly without using a server. |
| |
| Args: |
| llm: Initialized vLLM LLM instance (passed from external initialization) |
| messages: List of message dicts with role and content (OpenAI format) |
| sampling_params: vLLM SamplingParams instance (initialized externally) |
| |
| Returns: |
| str: Generated response text, or None if inference fails |
| """ |
| try: |
| |
| processed_messages = [] |
| for message in messages: |
| processed_message = message.copy() |
| if message["role"] == "user" and "content" in message: |
| processed_content = [] |
| for c in message["content"]: |
| if isinstance(c, dict) and c.get("type") == "image": |
| |
| image_path = c["image"] |
| new_image_path = image_path.replace("?", "%3F") |
|
|
| try: |
| base64_image, mime_type = get_image_base64_and_mime( |
| new_image_path |
| ) |
| if base64_image is None: |
| print( |
| f"Warning: Could not convert image: {new_image_path}" |
| ) |
| continue |
|
|
| |
| processed_content.append( |
| { |
| "type": "image_url", |
| "image_url": { |
| "url": f"data:{mime_type};base64,{base64_image}" |
| }, |
| } |
| ) |
| except Exception as e: |
| print( |
| f"Warning: Error processing image {new_image_path}: {e}" |
| ) |
| continue |
| else: |
| processed_content.append(c) |
|
|
| processed_message["content"] = processed_content |
| processed_messages.append(processed_message) |
|
|
| print("🔍 Running direct inference with vLLM...") |
|
|
| |
| outputs = llm.chat( |
| messages=processed_messages, |
| sampling_params=sampling_params, |
| ) |
|
|
| |
| if outputs and len(outputs) > 0: |
| generated_text = outputs[0].outputs[0].text |
| return generated_text |
| else: |
| print(f"Unexpected output format: {outputs}") |
| return None |
|
|
| except Exception as e: |
| print(f"Direct inference failed: {e}") |
| return None |
|
|