Spaces:
Running
Running
File size: 3,032 Bytes
d33203e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
#!/usr/bin/env python3
"""
Model processing utilities for local and remote AI models
"""
import requests
import base64
from io import BytesIO
from PIL import Image
from typing import Dict, Any, Optional
def image_to_base64(image: Image.Image) -> str:
"""Convert PIL image to base64 string"""
buffer = BytesIO()
image.save(buffer, format="PNG")
img_str = base64.b64encode(buffer.getvalue()).decode()
return img_str
def process_image_locally(image: Image.Image, prompt: str, model_name: str, local_manager) -> Dict[str, Any]:
"""
Process image using local models
"""
try:
if model_name == "Person on Track Detector":
# Special handling for person-on-track detection
result = local_manager.person_on_track_detector.detect_person_on_track(image)
return {"person_on_track_detection": result}
else:
caption = local_manager.generate_caption(model_name, image, prompt)
return {"generated_text": caption}
except Exception as e:
return {"error": f"Local processing failed: {str(e)}"}
def query_huggingface_api(image: Image.Image, prompt: str, model_name: str, api_token: str) -> Dict[str, Any]:
"""
Query Hugging Face API with image and prompt
"""
API_URL = f"https://api-inference.huggingface.co/models/{model_name}"
headers = {"Authorization": f"Bearer {api_token}"}
# Convert image to base64
img_base64 = image_to_base64(image)
# Prepare payload based on model type
if "blip" in model_name.lower():
# For BLIP models, send image directly
buffer = BytesIO()
image.save(buffer, format="PNG")
response = requests.post(
API_URL,
headers=headers,
files={"file": buffer.getvalue()}
)
else:
# For other vision-language models
payload = {
"inputs": {
"image": img_base64,
"text": prompt
}
}
response = requests.post(API_URL, headers=headers, json=payload)
if response.status_code == 200:
return response.json()
else:
return {"error": f"API request failed: {response.status_code} - {response.text}"}
def process_frame(frame_data: Dict, config: Dict[str, Any], local_manager=None) -> Dict[str, Any]:
"""
Process a single frame using the configured model
"""
model_type = config["model_type"]
selected_model = config["selected_model"]
prompt = config.get("prompt", "")
api_token = config.get("api_token")
# Process frame based on model type
if model_type == "Local Models" and local_manager:
result = process_image_locally(
frame_data['frame'],
prompt,
selected_model,
local_manager
)
else:
result = query_huggingface_api(
frame_data['frame'],
prompt,
selected_model,
api_token
)
return result |