Spaces:

karouswissem
/

captionnn

Runtime error

App Files Files Community

captionnn / app.py

karouswissem

Update app.py

77d0fe1 verified about 1 year ago

raw

history blame contribute delete

2.6 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from PIL import Image
	import torch
	import yake
	import requests
	from io import BytesIO

	# Load Janus-Pro-7B model and tokenizer
	model = AutoModelForCausalLM.from_pretrained("deepseek-ai/Janus-Pro-7B")
	tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/Janus-Pro-7B")

	# Function to process image and generate text-based caption using external image model (e.g., BLIP, CLIP)
	# For this case, we assume BLIP is used or any other text-based description for image
	def process_image_for_caption(image):
	# This is just a placeholder; replace this with a real image captioning model like BLIP or CLIP
	# For now, we'll return a dummy caption.
	return "A person holding a book in a library."

	# Function to enhance caption using Janus-Pro-7B
	def enhance_caption_with_janus(caption):
	inputs = tokenizer(caption, return_tensors="pt")
	outputs = model.generate(**inputs, max_length=100)
	enhanced_caption = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return enhanced_caption

	# YAKE-based keyword extraction and task name generation
	class YakeTaskGenerator:
	def __init__(self, n=2, top_k=3):
	self.kw_extractor = yake.KeywordExtractor(n=n, top=top_k)

	def extract_keywords(self, caption):
	return [kw[0] for kw in self.kw_extractor.extract_keywords(caption)]

	def generate_task_name(self, caption):
	keywords = self.extract_keywords(caption)
	if not keywords:
	return "General Image Processing"
	task_name = " ".join(keywords[:2]) # Use top 2 keywords for task
	task_name = task_name.capitalize() + " Analysis" # Format task name
	return task_name

	# Full pipeline for image processing and task name generation
	def process_image_and_generate_task(image):
	caption = process_image_for_caption(image) # Get image caption
	enhanced_caption = enhance_caption_with_janus(caption) # Enhance the caption using Janus-Pro-7B
	task_generator = YakeTaskGenerator()
	task_name = task_generator.generate_task_name(enhanced_caption)
	return task_name

	# Gradio Interface
	def gradio_interface(image):
	task_name = process_image_and_generate_task(image)
	return task_name

	# Create Gradio interface
	image_input = gr.Image(type="pil", label="Upload Image")
	output = gr.Textbox(label="Generated Task Name")

	gr.Interface(
	fn=gradio_interface,
	inputs=image_input,
	outputs=output,
	live=True,
	title="Image Captioning and Task Name Generation with Janus-Pro-7B"
	).launch(share=True, debug=True)