Spaces:

laxminarasimha6
/

multimodal_agent

Sleeping

App Files Files Community

multimodal_agent / main.py

laxminarasimha6

Update main.py

5529d68 verified about 1 year ago

raw

history blame contribute delete

5.08 kB

	import os
	import random
	import uuid
	import time
	import asyncio
	from threading import Thread
	import gradio as gr
	import torch
	from PIL import Image

	from models import (
	load_base_model,
	load_vision_model,
	load_image_model,
	load_shape_model,
	generate_3d_model
	)
	from utils import (
	clean_history,
	save_image,
	text_to_speech,
	web_search
	)
	from config import (
	MODEL_CONFIG,
	COMMANDS,
	VOICE_OPTIONS,
	MAX_TOKENS,
	DEFAULT_TOKENS
	)

	def setup_interface(description, css):
	# Load models
	base_model, tokenizer = load_base_model(MODEL_CONFIG["base_model"])
	vision_model, processor = load_vision_model(MODEL_CONFIG["vision_model"])
	image_model = load_image_model(MODEL_CONFIG["image_model"])
	shape_model = load_shape_model(MODEL_CONFIG["shape_model"])

	async def generate_voice(text, voice):
	try:
	audio_file = await text_to_speech(text, voice)
	if audio_file and os.path.exists(audio_file):
	return {"type": "audio", "value": audio_file}
	return "Sorry, voice generation failed. Please try again."
	except Exception as e:
	return f"Error generating voice: {str(e)}"

	def generate(
	message: str,
	chat_history: list[dict],
	max_new_tokens: int = DEFAULT_TOKENS,
	temperature: float = 0.7,
	top_p: float = 0.9,
	top_k: int = 50,
	repetition_penalty: float = 1.2,
	):
	try:
	# Handle Image Generation
	if message.strip().lower().startswith(COMMANDS["image"]):
	prompt = message[len(COMMANDS["image"]):].strip()
	yield "🎨 Generating image..."
	try:
	image = image_model(
	prompt=prompt,
	num_inference_steps=30,
	guidance_scale=7.5
	).images[0]

	image_path = save_image(image)
	yield f"![Generated Image]({image_path})"
	except Exception as e:
	yield f"Error generating image: {str(e)}"
	return

	# Handle Voice Synthesis
	voice_command = COMMANDS["voice"]
	is_voice = message.strip().lower().startswith(voice_command)
	voice_index = 1 if f"{voice_command}1" in message.lower() else 2 if f"{voice_command}2" in message.lower() else None

	if is_voice and voice_index:
	voice = VOICE_OPTIONS[voice_index - 1]
	text = message.replace(f"{voice_command}{voice_index}", "").strip()
	yield "🎤 Generating voice..."

	# Run voice generation in event loop
	loop = asyncio.get_event_loop()
	if loop.is_running():
	loop = asyncio.new_event_loop()
	result = loop.run_until_complete(generate_voice(text, voice))
	yield result
	return

	# Handle Web Search
	if message.strip().lower().startswith(COMMANDS["search"]):
	query = message[len(COMMANDS["search"]):].strip()
	yield "🔍 Searching..."
	results = web_search(query)
	yield results
	return

	# Handle 3D Generation
	if message.strip().lower().startswith(COMMANDS["shape"]):
	prompt = message[len(COMMANDS["shape"]):].strip()
	yield "🌟 Creating 3D model..."
	model_path = generate_3d_model(shape_model, prompt)
	if model_path:
	yield f"[Download 3D Model]({model_path})"
	else:
	yield "Sorry, 3D model generation failed. Please try again."
	return

	# Default text response
	yield "Processing your message..."
	# Add your text generation logic here
	response = f"I received your message: {message}"
	yield response

	except Exception as e:
	yield f"An error occurred: {str(e)}"

	# Create Gradio Interface
	demo = gr.ChatInterface(
	fn=generate,
	additional_inputs=[
	gr.Slider(label="Response Length", minimum=1, maximum=MAX_TOKENS, value=DEFAULT_TOKENS),
	gr.Slider(label="Creativity", minimum=0.1, maximum=1.0, value=0.7),
	gr.Slider(label="Focus", minimum=0.05, maximum=1.0, value=0.9),
	gr.Slider(label="Diversity", minimum=1, maximum=100, value=50),
	gr.Slider(label="Consistency", minimum=1.0, maximum=2.0, value=1.2),
	],
	title="NexusAI",
	description=description,
	theme=gr.themes.Soft(),
	css=css,
	examples=[
	["@voice2 Tell me about quantum computing"],
	["@create A beautiful sunset over mountains"],
	["@search Latest developments in AI"],
	["Tell me a story about space exploration"],
	]
	)

	return demo