Spaces:

Amitesh007
/

Dataset_Creator

Runtime error

App Files Files Community

Dataset_Creator / app.py

Amitesh007

Update app.py

32feaab verified about 1 month ago

raw

history blame contribute delete

4.18 kB


	import gradio as gr
	import requests
	import base64
	import tempfile
	import os
	from PIL import Image
	import numpy as np

	# ==============================
	# Configuration
	# ==============================

	HF_TOKEN = os.getenv("HF_TOKEN") # optional but recommended

	# Example Models (can be extended)
	MODEL_REGISTRY = {
	"Text - Mistral 7B Instruct": {
	"type": "text",
	"endpoint": "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
	},
	"Text - Llama 3 8B Instruct": {
	"type": "text",
	"endpoint": "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct"
	},
	"Vision - LLaVA": {
	"type": "vision",
	"endpoint": "https://api-inference.huggingface.co/models/llava-hf/llava-1.5-7b-hf"
	},
	"Audio - Whisper": {
	"type": "audio",
	"endpoint": "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
	}
	}

	headers = {
	"Authorization": f"Bearer {HF_TOKEN}" if HF_TOKEN else None
	}



	# Helper Functions


	def query_text_model(endpoint, prompt):
	payload = {"inputs": prompt}
	response = requests.post(endpoint, headers=headers, json=payload)
	try:
	return response.json()[0]["generated_text"]
	except:
	return str(response.json())


	def query_vision_model(endpoint, prompt, image):
	buffered = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
	image.save(buffered.name)

	with open(buffered.name, "rb") as f:
	img_bytes = f.read()

	payload = {
	"inputs": {
	"image": base64.b64encode(img_bytes).decode("utf-8"),
	"text": prompt
	}
	}

	response = requests.post(endpoint, headers=headers, json=payload)
	return response.json()


	def query_audio_model(endpoint, audio_path):
	with open(audio_path, "rb") as f:
	data = f.read()

	response = requests.post(endpoint, headers=headers, data=data)
	return response.json()


	# Main Chat Function


	def multimodal_chat(prompt, image, audio, selected_models, history):
	outputs = {}

	for model_name in selected_models:
	model = MODEL_REGISTRY[model_name]

	try:
	if model["type"] == "text":
	result = query_text_model(model["endpoint"], prompt)

	elif model["type"] == "vision" and image is not None:
	result = query_vision_model(model["endpoint"], prompt, image)

	elif model["type"] == "audio" and audio is not None:
	result = query_audio_model(model["endpoint"], audio)

	else:
	result = "Unsupported input for this model"

	except Exception as e:
	result = f"Error: {str(e)}"

	outputs[model_name] = result

	history.append((prompt, outputs))

	return history, "", None, None



	# UI


	with gr.Blocks(theme=gr.themes.Soft(), title="Multimodal Model Comparison") as demo:

	gr.Markdown("""
	# Multimodal Chat + Model Comparison
	Compare HuggingFace models across modalities
	""")

	with gr.Row():

	with gr.Column(scale=3):
	chatbot = gr.Chatbot(height=500)

	prompt = gr.Textbox(
	placeholder="Enter your prompt...",
	label="Text Input"
	)

	with gr.Row():
	image_input = gr.Image(type="pil", label="Image Input")
	audio_input = gr.Audio(type="filepath", label="Audio Input")

	submit = gr.Button("Send")

	with gr.Column(scale=1):
	gr.Markdown("### Model Selection")

	model_selector = gr.CheckboxGroup(
	choices=list(MODEL_REGISTRY.keys()),
	value=["Text - Mistral 7B Instruct"],
	label="Select Models"
	)

	clear = gr.Button("Clear")

	state = gr.State([])

	submit.click(
	multimodal_chat,
	inputs=[prompt, image_input, audio_input, model_selector, state],
	outputs=[chatbot, prompt, image_input, audio_input]
	)

	clear.click(
	lambda: [],
	None,
	chatbot
	)



	# Launch


	if __name__ == "__main__":
	demo.launch()