Spaces:

Navada25
/

ObjectDetection_App

Sleeping

App Files Files Community

ObjectDetection_App / backend /chat_agent.py

Navada25

Deploy NAVADA 2.0 Lite - Optimized for HF Spaces (no face recognition)

75f48fa verified 3 months ago

raw

history blame contribute delete

7.52 kB

	"""
	AI Chat Agent with conversation memory and text-to-speech capabilities
	"""
	import os
	from openai import OpenAI # type: ignore
	import tempfile
	from datetime import datetime
	import json

	# Initialize OpenAI client
	api_key = os.getenv("OPENAI_API_KEY")
	if not api_key:
	raise ValueError("OPENAI_API_KEY environment variable is required")
	client = OpenAI(api_key=api_key)

	class ChatAgent:
	def __init__(self):
	"""Initialize the chat agent with conversation memory"""
	self.conversation_history = []
	self.system_prompt = """You are NAVADA Assistant, an intelligent AI companion for computer vision analysis.
	You help users understand what's in their images, answer questions about detected objects,
	and provide insights about visual content. You're friendly, helpful, and knowledgeable about
	computer vision, image analysis, and can discuss colors, positions, sizes, and relationships
	between objects in images. You have access to detailed detection results including object colors,
	positions, sizes, and confidence scores."""

	# Add system message to history
	self.conversation_history.append({
	"role": "system",
	"content": self.system_prompt
	})

	# Store context about current image analysis
	self.current_image_context = None

	def update_image_context(self, detected_objects, detailed_attributes=None):
	"""Update the agent's knowledge about the current image"""
	context = f"Current image analysis shows: {', '.join(detected_objects) if detected_objects else 'no objects detected'}."

	if detailed_attributes:
	context += "\n\nDetailed analysis:"
	for attr in detailed_attributes:
	colors = " and ".join(attr.get('colors', ['unknown'])[:2])
	context += f"\n- {attr['label']}: {colors} color(s), {attr.get('size', 'unknown')} size, located at {attr.get('position', 'unknown')} (confidence: {attr.get('confidence', 'unknown')})"

	self.current_image_context = context

	# Add context to conversation as a system message
	self.conversation_history.append({
	"role": "system",
	"content": f"Image context update: {context}"
	})

	def chat(self, user_message, include_voice=True):
	"""
	Process user message and return response with optional voice

	Args:
	user_message: The user's input message
	include_voice: Whether to generate voice response

	Returns:
	tuple: (text_response, voice_file_path or None)
	"""
	# Add user message to history
	self.conversation_history.append({
	"role": "user",
	"content": user_message
	})

	# Keep conversation history manageable (last 20 messages)
	if len(self.conversation_history) > 20:
	# Keep system prompt and current context, remove old messages
	system_messages = [msg for msg in self.conversation_history if msg["role"] == "system"]
	recent_messages = self.conversation_history[-15:]
	self.conversation_history = system_messages + recent_messages

	try:
	# Get response from OpenAI
	response = client.chat.completions.create(
	model="gpt-4o-mini",
	messages=self.conversation_history,
	temperature=0.7,
	max_tokens=500
	)

	text_response = response.choices[0].message.content

	# Add assistant response to history
	self.conversation_history.append({
	"role": "assistant",
	"content": text_response
	})

	# Generate voice if requested
	voice_file = None
	if include_voice:
	voice_file = self.generate_voice(text_response)

	return text_response, voice_file

	except Exception as e:
	error_msg = f"Chat error: {str(e)}"
	return error_msg, None

	def generate_voice(self, text):
	"""Generate voice narration for text using OpenAI TTS"""
	try:
	# Generate speech using OpenAI TTS
	response = client.audio.speech.create(
	model="tts-1",
	voice="nova", # Options: alloy, echo, fable, onyx, nova, shimmer
	input=text,
	response_format="mp3"
	)

	# Save to temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
	temp_audio.write(response.content)
	return temp_audio.name

	except Exception as e:
	print(f"Voice generation error: {e}")
	return None

	def get_conversation_summary(self):
	"""Get a summary of the conversation"""
	messages = [msg for msg in self.conversation_history if msg["role"] in ["user", "assistant"]]
	return messages

	def reset_conversation(self):
	"""Reset conversation history while keeping system prompt"""
	self.conversation_history = [{
	"role": "system",
	"content": self.system_prompt
	}]
	self.current_image_context = None

	def save_conversation(self, filepath=None):
	"""Save conversation history to file"""
	if filepath is None:
	filepath = f"conversation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"

	with open(filepath, 'w') as f:
	json.dump({
	'timestamp': datetime.now().isoformat(),
	'conversation': self.conversation_history,
	'image_context': self.current_image_context
	}, f, indent=2)

	return filepath

	def load_conversation(self, filepath):
	"""Load conversation history from file"""
	with open(filepath, 'r') as f:
	data = json.load(f)
	self.conversation_history = data['conversation']
	self.current_image_context = data.get('image_context')

	# Create a global chat agent instance
	chat_agent = ChatAgent()

	# Helper functions for easy integration
	def chat_with_agent(message, detected_objects=None, detailed_attributes=None, include_voice=True):
	"""
	Simple interface to chat with the agent

	Args:
	message: User's message
	detected_objects: List of detected objects (optional)
	detailed_attributes: Detailed attributes from enhanced detection (optional)
	include_voice: Whether to generate voice response

	Returns:
	tuple: (text_response, voice_file_path or None)
	"""
	# Update context if new detection results provided
	if detected_objects is not None:
	chat_agent.update_image_context(detected_objects, detailed_attributes)

	return chat_agent.chat(message, include_voice)

	def reset_chat():
	"""Reset the chat conversation"""
	chat_agent.reset_conversation()

	def get_chat_history():
	"""Get the current chat history"""
	return chat_agent.get_conversation_summary()