Spaces:

PRC142004
/

Pest_Outbreak_Prediction

Sleeping

App Files Files Community

Pest_Outbreak_Prediction / image_summarizer.py

krushimitravit

Update image_summarizer.py

3f4fb13 verified 3 months ago

raw

history blame contribute delete

4.83 kB

	import base64
	import os
	from openai import OpenAI

	# Initialize NVIDIA Client
	client = OpenAI(
	base_url="https://integrate.api.nvidia.com/v1",
	api_key=os.getenv('GEMINI_API_KEY_1')
	)

	# Model configurations
	PRIMARY_MODEL = "meta/llama-3.2-90b-vision-instruct"
	FALLBACK_MODEL = "meta/llama-3.1-70b-instruct" # Text-only fallback model
	IMAGE_PATH = "image.png"


	def encode_image(image_path):
	"""Encode image to base64 string."""
	with open(image_path, "rb") as image_file:
	return base64.b64encode(image_file.read()).decode('utf-8')


	def summarize_with_vision_model(base64_image):
	"""
	Attempt to summarize image using vision model.

	Args:
	base64_image: Base64 encoded image string

	Returns:
	str: Summary text or None if failed
	"""
	try:
	print(f"🔍 Attempting with primary vision model: {PRIMARY_MODEL}...")

	completion = client.chat.completions.create(
	model=PRIMARY_MODEL,
	messages=[
	{
	"role": "user",
	"content": [
	{"type": "text", "text": "Please summarize what you see in this image."},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/png;base64,{base64_image}"
	}
	}
	]
	}
	],
	max_tokens=500,
	temperature=0.2,
	stream=True
	)

	print("\n✅ Image Summary (Vision Model):\n" + "-" * 50)
	summary = ""
	for chunk in completion:
	content = chunk.choices[0].delta.content
	if content is not None:
	print(content, end="", flush=True)
	summary += content
	print("\n" + "-" * 50)

	return summary

	except Exception as e:
	print(f"\n⚠️ Vision model failed: {e}")
	return None


	def summarize_with_text_fallback():
	"""
	Fallback method using text-only LLM.
	Provides a generic response when vision model fails.

	Returns:
	str: Fallback response
	"""
	try:
	print(f"\n🔄 Falling back to text model: {FALLBACK_MODEL}...")

	# Create a prompt that acknowledges the limitation
	prompt = """I attempted to analyze an image but the vision model is unavailable.
	Please provide a helpful response about what types of information can typically be extracted from images,
	and suggest alternative approaches for image analysis."""

	completion = client.chat.completions.create(
	model=FALLBACK_MODEL,
	messages=[
	{
	"role": "user",
	"content": prompt
	}
	],
	max_tokens=500,
	temperature=0.2,
	stream=True
	)

	print("\n💡 Fallback Response (Text Model):\n" + "-" * 50)
	response = ""
	for chunk in completion:
	content = chunk.choices[0].delta.content
	if content is not None:
	print(content, end="", flush=True)
	response += content
	print("\n" + "-" * 50)

	return response

	except Exception as e:
	print(f"\n❌ Fallback model also failed: {e}")
	return None


	def summarize_image():
	"""
	Main function to summarize an image with fallback support.

	Attempts to use vision model first, falls back to text model if needed.
	"""
	# Check if image exists
	if not os.path.exists(IMAGE_PATH):
	print(f"❌ Error: {IMAGE_PATH} not found.")
	print(f"📁 Current directory: {os.getcwd()}")
	print(f"📋 Files in current directory: {os.listdir('.')}")
	return

	print(f"📸 Processing {IMAGE_PATH}...")
	print(f"📏 File size: {os.path.getsize(IMAGE_PATH)} bytes\n")

	# Encode the image
	try:
	base64_image = encode_image(IMAGE_PATH)
	except Exception as e:
	print(f"❌ Error encoding image: {e}")
	return

	# Try vision model first
	result = summarize_with_vision_model(base64_image)

	# If vision model failed, use fallback
	if result is None:
	print("\n🔄 Primary model failed, attempting fallback...")
	result = summarize_with_text_fallback()

	# Final status
	if result is None:
	print("\n❌ All methods failed. Please check:")
	print(" 1. API key validity")
	print(" 2. Network connection")
	print(" 3. NVIDIA API service status")
	else:
	print("\n✅ Image processing completed successfully!")


	if __name__ == "__main__":
	summarize_image()