Spaces:

Tongbo
/

flashsloth

Runtime error

App Files Files Community

flashsloth / app.py

Tongbo

Update app.py

c545e2f verified 11 months ago

raw

history blame contribute delete

5.71 kB

	import os
	import torch
	from flashsloth.constants import (
	IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN,
	DEFAULT_IM_END_TOKEN, LEARNABLE_TOKEN, LEARNABLE_TOKEN_INDEX
	)
	from flashsloth.conversation import conv_templates, SeparatorStyle
	from flashsloth.model.builder import load_pretrained_model
	from flashsloth.utils import disable_torch_init
	from flashsloth.mm_utils import (
	tokenizer_image_token, process_images, process_images_hd_inference,
	get_model_name_from_path, KeywordsStoppingCriteria
	)
	from PIL import Image
	import gradio as gr

	from transformers import TextIteratorStreamer
	from threading import Thread

	disable_torch_init()

	MODEL_PATH_HD = "Tongbo/FlashSloth_HD-3.2B"
	MODEL_PATH_NEW = "Tongbo/FlashSloth-3.2B"

	model_name_hd = get_model_name_from_path(MODEL_PATH_HD)
	model_name_new = get_model_name_from_path(MODEL_PATH_NEW)

	models = {
	"FlashSloth HD": load_pretrained_model(MODEL_PATH_HD, None, model_name_hd),
	"FlashSloth": load_pretrained_model(MODEL_PATH_NEW, None, model_name_new)
	}

	for key in models:
	tokenizer, model, image_processor, context_len = models[key]
	model.to('cuda')
	model.eval()

	def generate_description(image, prompt_text, temperature, top_p, max_tokens, selected_model):
	"""
	生成图片描述的函数，支持流式输出，并根据选择的模型进行处理。
	新增参数:
	- selected_model: 用户选择的模型名称
	"""
	keywords = ['</s>']

	tokenizer, model, image_processor, context_len = models[selected_model]

	text = DEFAULT_IMAGE_TOKEN + '\n' + prompt_text
	text = text + LEARNABLE_TOKEN

	image = image.convert('RGB')
	if model.config.image_hd:
	image_tensor = process_images_hd_inference([image], image_processor, model.config)[0]
	else:
	image_tensor = process_images([image], image_processor, model.config)[0]
	image_tensor = image_tensor.unsqueeze(0).to(dtype=torch.float16, device='cuda', non_blocking=True)

	conv = conv_templates["phi2"].copy()
	conv.append_message(conv.roles[0], text)
	conv.append_message(conv.roles[1], None)
	prompt = conv.get_prompt()

	input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt')
	input_ids = input_ids.unsqueeze(0).to(device='cuda', non_blocking=True)

	stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)

	streamer = TextIteratorStreamer(
	tokenizer=tokenizer,
	skip_prompt=True,
	skip_special_tokens=True
	)

	generation_kwargs = dict(
	inputs=input_ids,
	images=image_tensor,
	do_sample=True,
	temperature=temperature,
	top_p=top_p,
	max_new_tokens=int(max_tokens),
	use_cache=True,
	eos_token_id=tokenizer.eos_token_id,
	stopping_criteria=[stopping_criteria],
	streamer=streamer
	)

	def _generate():
	with torch.inference_mode():
	model.generate(**generation_kwargs)

	generation_thread = Thread(target=_generate)
	generation_thread.start()

	partial_text = ""
	for new_text in streamer:
	partial_text += new_text
	yield partial_text

	generation_thread.join()

	custom_css = """
	<style>
	/* 增大标题字体 */
	#title {
	font-size: 80px !important;
	text-align: center;
	margin-bottom: 20px;
	}

	/* 增大描述文字字体 */
	#description {
	font-size: 24px !important;
	text-align: center;
	margin-bottom: 40px;
	}

	/* 增大标签和输入框的字体 */
	.gradio-container * {
	font-size: 18px !important;
	}

	/* 增大按钮字体 */
	button {
	font-size: 20px !important;
	padding: 10px 20px;
	}

	/* 增大输出文本的字体 */
	.output_text {
	font-size: 20px !important;
	}
	</style>
	"""

	with gr.Blocks(css=custom_css) as demo:
	gr.HTML(custom_css)
	gr.HTML("<h1 style='font-size:70px; text-align:center;'>FlashSloth 多模态大模型 Demo</h1>")

	with gr.Row():
	with gr.Column(scale=1):
	image_input = gr.Image(type="pil", label="上传图片")

	temperature_slider = gr.Slider(
	minimum=0.01,
	maximum=1.0,
	step=0.05,
	value=0.7,
	label="Temperature"
	)
	topp_slider = gr.Slider(
	minimum=0.01,
	maximum=1.0,
	step=0.05,
	value=0.9,
	label="Top-p"
	)
	maxtoken_slider = gr.Slider(
	minimum=64,
	maximum=3072,
	step=1,
	value=3072,
	label="Max Tokens"
	)

	model_dropdown = gr.Dropdown(
	choices=list(models.keys()),
	value=list(models.keys())[0],
	label="选择模型",
	type="value"
	)

	with gr.Column(scale=1):
	prompt_input = gr.Textbox(
	lines=3,
	placeholder="Describe this photo in detail.",
	label="问题提示"
	)
	submit_button = gr.Button("生成答案", variant="primary")

	output_text = gr.Textbox(
	label="生成的答案",
	interactive=False,
	lines=15,
	elem_classes=["output_text"]
	)

	submit_button.click(
	fn=generate_description,
	inputs=[image_input, prompt_input, temperature_slider, topp_slider, maxtoken_slider, model_dropdown],
	outputs=output_text,
	show_progress=True
	)

	if __name__ == "__main__":
	demo.queue().launch()