Spaces:

Prashant26am
/

llava-chat

Running

App Files Files Community

llava-chat / src /api /app.py

Prashant26am

fix: Improve error handling and logging for better debugging

2e2a7bf 9 months ago

raw

history blame contribute delete

7.63 kB

	"""
	Gradio interface for the LLaVA model.
	"""

	import gradio as gr
	from PIL import Image
	import os
	import tempfile
	import torch
	from fastapi import FastAPI, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	import traceback
	import sys

	from ..configs.settings import (
	GRADIO_THEME,
	GRADIO_TITLE,
	GRADIO_DESCRIPTION,
	DEFAULT_MAX_NEW_TOKENS,
	DEFAULT_TEMPERATURE,
	DEFAULT_TOP_P,
	API_HOST,
	API_PORT,
	API_WORKERS,
	API_RELOAD
	)
	from ..models.llava_model import LLaVAModel
	from ..utils.logging import setup_logging, get_logger

	# Set up logging
	setup_logging()
	logger = get_logger(__name__)

	# Initialize FastAPI app
	app = FastAPI(title="LLaVA Web Interface")

	# Configure CORS
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Initialize model
	model = None

	def initialize_model():
	global model
	try:
	logger.info("Initializing LLaVA model...")
	# Use a smaller model variant and enable memory optimizations
	model = LLaVAModel(
	vision_model_path="openai/clip-vit-base-patch32", # Smaller vision model
	language_model_path="TinyLlama/TinyLlama-1.1B-Chat-v1.0", # Smaller language model
	device="cpu", # Force CPU for Hugging Face Spaces
	projection_hidden_dim=2048 # Reduce projection layer size
	)

	# Enable memory optimizations
	torch.cuda.empty_cache() # Clear any cached memory
	if hasattr(model, 'language_model'):
	model.language_model.config.use_cache = False # Disable KV cache

	logger.info(f"Model initialized on {model.device}")
	return True
	except Exception as e:
	error_msg = f"Error initializing model: {str(e)}\n{traceback.format_exc()}"
	logger.error(error_msg)
	print(error_msg, file=sys.stderr)
	return False

	def process_image(
	image: Image.Image,
	prompt: str,
	max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS,
	temperature: float = DEFAULT_TEMPERATURE,
	top_p: float = DEFAULT_TOP_P
	) -> str:
	"""
	Process an image with the LLaVA model.

	Args:
	image: Input image
	prompt: Text prompt
	max_new_tokens: Maximum number of tokens to generate
	temperature: Sampling temperature
	top_p: Top-p sampling parameter

	Returns:
	str: Model response
	"""
	if not model:
	error_msg = "Error: Model not initialized"
	logger.error(error_msg)
	return error_msg

	if image is None:
	error_msg = "Error: No image provided"
	logger.error(error_msg)
	return error_msg

	if not prompt or not prompt.strip():
	error_msg = "Error: No prompt provided"
	logger.error(error_msg)
	return error_msg

	temp_path = None
	try:
	logger.info(f"Processing image with prompt: {prompt[:100]}...")

	# Save the uploaded image temporarily
	with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_file:
	image.save(temp_file.name)
	temp_path = temp_file.name

	# Clear memory before processing
	torch.cuda.empty_cache()

	# Generate response with reduced memory usage
	with torch.inference_mode(): # More memory efficient than no_grad
	response = model(
	image=image,
	prompt=prompt,
	max_new_tokens=max_new_tokens,
	temperature=temperature,
	top_p=top_p
	)

	logger.info("Successfully generated response")
	return response

	except Exception as e:
	error_msg = f"Error processing image: {str(e)}\n{traceback.format_exc()}"
	logger.error(error_msg)
	print(error_msg, file=sys.stderr)
	return f"Error: {str(e)}"

	finally:
	# Clean up temporary file
	if temp_path and os.path.exists(temp_path):
	try:
	os.unlink(temp_path)
	except Exception as e:
	logger.warning(f"Error cleaning up temporary file: {str(e)}")

	# Clear memory after processing
	try:
	torch.cuda.empty_cache()
	except Exception as e:
	logger.warning(f"Error clearing CUDA cache: {str(e)}")

	def create_interface() -> gr.Blocks:
	"""Create and return the Gradio interface."""
	with gr.Blocks(theme=GRADIO_THEME) as interface:
	gr.Markdown(f"""# {GRADIO_TITLE}

	{GRADIO_DESCRIPTION}

	## Example Prompts

	Try these prompts to get started:
	- "What can you see in this image?"
	- "Describe this scene in detail"
	- "What emotions does this image convey?"
	- "What's happening in this picture?"
	- "Can you identify any objects or people in this image?"

	## Usage Instructions

	1. Upload an image using the image uploader
	2. Enter your prompt in the text box
	3. (Optional) Adjust the generation parameters
	4. Click "Generate Response" to get LLaVA's analysis
	""")

	with gr.Row():
	with gr.Column():
	# Input components
	image_input = gr.Image(type="pil", label="Upload Image")
	prompt_input = gr.Textbox(
	label="Prompt",
	placeholder="What can you see in this image?",
	lines=3
	)

	with gr.Accordion("Generation Parameters", open=False):
	max_tokens = gr.Slider(
	minimum=64,
	maximum=2048,
	value=DEFAULT_MAX_NEW_TOKENS,
	step=64,
	label="Max New Tokens"
	)
	temperature = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=DEFAULT_TEMPERATURE,
	step=0.1,
	label="Temperature"
	)
	top_p = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=DEFAULT_TOP_P,
	step=0.1,
	label="Top P"
	)

	generate_btn = gr.Button("Generate Response", variant="primary")

	with gr.Column():
	# Output component
	output = gr.Textbox(
	label="Response",
	lines=10,
	show_copy_button=True
	)

	# Set up event handlers with explicit types
	generate_btn.click(
	fn=process_image,
	inputs=[
	image_input,
	prompt_input,
	max_tokens,
	temperature,
	top_p
	],
	outputs=output,
	api_name="process_image"
	)

	return interface

	# Create Gradio app
	demo = create_interface()

	# Mount Gradio app
	app = gr.mount_gradio_app(app, demo, path="/")

	def main():
	"""Run the FastAPI application."""
	import uvicorn

	# Initialize model
	if not initialize_model():
	logger.error("Failed to initialize model. Exiting...")
	sys.exit(1)

	# Start the server
	uvicorn.run(
	app,
	host=API_HOST,
	port=API_PORT,
	workers=API_WORKERS,
	reload=API_RELOAD,
	log_level="info"
	)

	if __name__ == "__main__":
	main()