Spaces:

Sachin5112
/

Image-To-Prompt

Sleeping

App Files Files Community

Image-To-Prompt / app.py

Sachin5112

Update app.py

ba7578e verified 16 days ago

raw

history blame contribute delete

3.46 kB

	import gradio as gr
	import subprocess
	import torch
	from PIL import Image
	from transformers import AutoProcessor, AutoModelForCausalLM

	# Install flash-attn if needed
	subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

	# Initialize Florence model
	device = "cuda" if torch.cuda.is_available() else "cpu"
	florence_model = AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True).to(device).eval()
	florence_processor = AutoProcessor.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True)

	def generate_caption(image):
	if image is None:
	return ""
	if not isinstance(image, Image.Image):
	image = Image.fromarray(image)

	inputs = florence_processor(text="<MORE_DETAILED_CAPTION>", images=image, return_tensors="pt").to(device)
	generated_ids = florence_model.generate(
	input_ids=inputs["input_ids"],
	pixel_values=inputs["pixel_values"],
	max_new_tokens=1024,
	early_stopping=False,
	do_sample=False,
	num_beams=3,
	)
	generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
	parsed_answer = florence_processor.post_process_generation(
	generated_text,
	task="<MORE_DETAILED_CAPTION>",
	image_size=(image.width, image.height)
	)
	return parsed_answer["<MORE_DETAILED_CAPTION>"]

	# Custom CSS for a "Beautiful UI"
	css = """
	.container { max-width: 900px; margin: auto; padding-top: 2rem; }
	.header { text-align: center; margin-bottom: 2rem; }
	.header h1 { font-size: 2.5rem; font-weight: 800; color: #ffffff; margin-bottom: 0.5rem; }
	.header p { color: #a0a0a0; font-size: 1.1rem; }
	.generate-btn {
	background: linear-gradient(90deg, #4776E6 0%, #8E54E9 100%) !important;
	border: none !important;
	color: white !important;
	font-weight: bold !important;
	}
	.generate-btn:hover { transform: scale(1.02); transition: 0.2s; }
	.output-box { border-radius: 10px !important; background-color: #1a1a1a !important; }
	"""

	with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="violet", secondary_hue="indigo")) as demo:
	with gr.Column(elem_classes="container"):
	# HTML Header Section
	gr.HTML(
	"""
	<div class="header">
	<h1>✨ Image to Prompt Studio</h1>
	<p>Upload an image to generate a highly detailed AI prompt using Florence-2.</p>
	</div>
	"""
	)

	with gr.Row():
	with gr.Column(scale=1):
	input_img = gr.Image(label="Upload Image", type="pil")
	submit_btn = gr.Button("Generate Prompt", variant="primary", elem_classes="generate-btn")

	with gr.Column(scale=1):
	output_text = gr.Textbox(
	label="Generated Prompt",
	lines=8,
	placeholder="Your prompt will appear here...",
	show_copy_button=True,
	elem_classes="output-box"
	)

	# Example images (optional)
	gr.Examples(
	examples=[], # You can add paths to example images here
	inputs=input_img
	)

	# Logic
	submit_btn.click(
	fn=generate_caption,
	inputs=[input_img],
	outputs=[output_text]
	)

	if __name__ == "__main__":
	demo.launch(debug=True)