Spaces:

Werli
/

Multi-Tagger

Running

App Files Files Community

Werli commited on Apr 13, 2025

Commit

67dd367

verified ·

1 Parent(s): 6350bdc

More clean

Browse files

Files changed (1) hide show

app.py +9 -21

app.py CHANGED Viewed

@@ -42,14 +42,13 @@ else:
 TITLE = "Multi-Tagger"
 DESCRIPTION = """
-Multi-Tagger is a powerful and versatile application that integrates two cutting-edge models: Waifu Diffusion and Florence 2. This app is designed to provide comprehensive image analysis and captioning capabilities, making it a valuable tool for AI artists, researchers, and enthusiasts.
-Features:
-- Supports batch processing of multiple images.
-- Tags images with multiple categories: general tags, character tags, and ratings.
-- Displays categorized tags in a structured format.
-- Includes a separate tab for image captioning using Florence 2. Supports CUDA, MPS or CPU if one of them is available.
-- Supports various captioning tasks (e.g., Caption, Detailed Caption, Object Detection), it can display output text and images for tasks that generate visual outputs.
 Example image by [me.](https://huggingface.co/Werli)
 """
@@ -81,7 +80,6 @@ kaomojis=['0_0','(o)_(o)','+_+','+_-','._.','<o>_<o>','<|>_<|>','=_=','>_<','3_3
 def parse_args()->argparse.Namespace:parser=argparse.ArgumentParser();parser.add_argument('--score-slider-step',type=float,default=.05);parser.add_argument('--score-general-threshold',type=float,default=.35);parser.add_argument('--score-character-threshold',type=float,default=.85);parser.add_argument('--share',action='store_true');return parser.parse_args()
 def load_labels(dataframe)->list[str]:name_series=dataframe['name'];name_series=name_series.map(lambda x:x.replace('_',' ')if x not in kaomojis else x);tag_names=name_series.tolist();rating_indexes=list(np.where(dataframe['category']==9)[0]);general_indexes=list(np.where(dataframe['category']==0)[0]);character_indexes=list(np.where(dataframe['category']==4)[0]);return tag_names,rating_indexes,general_indexes,character_indexes
 def mcut_threshold(probs):sorted_probs=probs[probs.argsort()[::-1]];difs=sorted_probs[:-1]-sorted_probs[1:];t=difs.argmax();thresh=(sorted_probs[t]+sorted_probs[t+1])/2;return thresh
 class Timer:
 	def __init__(self):self.start_time=time.perf_counter();self.checkpoints=[('Start',self.start_time)]
 	def checkpoint(self,label='Checkpoint'):now=time.perf_counter();self.checkpoints.append((label,now))
@@ -94,7 +92,7 @@ class Timer:
 		for(label,curr_time)in self.checkpoints[1:]:elapsed=curr_time-prev_time;print(f"{label.ljust(max_label_length)}: {elapsed:.3f} seconds");prev_time=curr_time
 		total_time=self.checkpoints[-1][1]-self.start_time;print(f"{'Total Execution Time'.ljust(max_label_length)}: {total_time:.3f} seconds\n");self.checkpoints.clear()
 	def restart(self):self.start_time=time.perf_counter();self.checkpoints=[('Start',self.start_time)]
 class Llama3Reorganize:
 	def __init__(self,repoId:str,device:str=None,loadModel:bool=False):
 		self.modelPath=self.download_model(repoId)
@@ -107,17 +105,14 @@ class Llama3Reorganize:
 			else:device='cpu'
 		self.device=device;self.system_prompt='Please reorganize the following English labels describing a single image into a readable English article of no more than 300 words. If there are name labels, include them in the article. Since the target audience is not general, explicit content can be written in full without hesitation. No introduction is needed; directly reply with the English article:'
 		if loadModel:self.load_model()
 def download_model(self,repoId):
 	import warnings,requests;allowPatterns=['config.json','generation_config.json','model.bin','pytorch_model.bin','pytorch_model.bin.index.json','pytorch_model-*.bin','sentencepiece.bpe.model','tokenizer.json','tokenizer_config.json','shared_vocabulary.txt','shared_vocabulary.json','special_tokens_map.json','spiece.model','vocab.json','model.safetensors','model-*.safetensors','model.safetensors.index.json','quantize_config.json','tokenizer.model','vocabulary.json','preprocessor_config.json','added_tokens.json'];kwargs={'allow_patterns':allowPatterns}
 	try:return huggingface_hub.snapshot_download(repoId,**kwargs)
 	except(huggingface_hub.utils.HfHubHTTPError,requests.exceptions.ConnectionError)as exception:warnings.warn('An error occured while synchronizing the model %s from the Hugging Face Hub:\n%s',repoId,exception);warnings.warn('Trying to load the model directly from the local cache, if it exists.');kwargs['local_files_only']=True;return huggingface_hub.snapshot_download(repoId,**kwargs)
 def load_model(self):
 	import ctranslate2,transformers
 	try:print('\n\nLoading model: %s\n\n'%self.modelPath);kwargsTokenizer={'pretrained_model_name_or_path':self.modelPath};kwargsModel={'device':self.device,'model_path':self.modelPath,'compute_type':'auto'};self.roleSystem={'role':'system','content':self.system_prompt};self.Model=ctranslate2.Generator(**kwargsModel);self.Tokenizer=transformers.AutoTokenizer.from_pretrained(**kwargsTokenizer);self.terminators=[self.Tokenizer.eos_token_id,self.Tokenizer.convert_tokens_to_ids('<|eot_id|>')]
 	except Exception as e:self.release_vram();raise e
 def release_vram(self):
 	try:
 		import torch
@@ -130,7 +125,6 @@ def release_vram(self):
 			except Exception as e:print(traceback.format_exc());print('\tcuda empty cache, error: '+str(e))
 			print('release vram end.')
 	except Exception as e:print(traceback.format_exc());print('Error release vram: '+str(e))
 def reorganize(self,text:str,max_length:int=400):
 	output=None;result=None
 	try:
@@ -142,7 +136,7 @@ def reorganize(self,text:str,max_length:int=400):
 			elif result[0]=='『'and result[len(result)-1]=='』':result=result[1:-1]
 	except Exception as e:print(traceback.format_exc());print('Error reorganize text: '+str(e))
 	return result
 class Predictor:
     def __init__(self):
         self.model_target_size = None
@@ -401,7 +395,6 @@ class Predictor:
                 except Exception as e:
                     print(traceback.format_exc())
                     print("Error predict: " + str(e))
-            # Result
             # Zip creation logic:
             download = []
             if txt_infos is not None and len(txt_infos) > 0:
@@ -449,8 +442,6 @@ def remove_image_from_gallery(gallery:list,selected_image:str):
 	selected_image=ast.literal_eval(selected_image)
 	if selected_image in gallery:gallery.remove(selected_image)
 	return gallery
-# END
 def fig_to_pil(fig):buf=io.BytesIO();fig.savefig(buf,format='png');buf.seek(0);return Image.open(buf)
 @spaces.GPU
 def run_example(task_prompt,image,text_input=None):
@@ -534,10 +525,7 @@ dropdown_list = [
     SWINV2_MODEL_IS_DSV1_REPO,
     EVA02_LARGE_MODEL_IS_DSV1_REPO,
 ]
-llama_list = [
-    META_LLAMA_3_3B_REPO,
-    META_LLAMA_3_8B_REPO,
-]
 def _restart_space():
 	HF_TOKEN=os.getenv('HF_TOKEN')

 TITLE = "Multi-Tagger"
 DESCRIPTION = """
+Multi-Tagger is a versatile application combining Waifu Diffusion and Florence 2 models for advanced image analysis and captioning. Ideal for AI artists, researchers, and enthusiasts, it offers:
+- Batch processing for multiple images.
+- Multi-category tagging.
+- Structured tag display.
+- Image captioning with Florence 2, supporting CUDA, MPS, or CPU.
+- Various captioning tasks (Caption, Detailed Caption, Object Detection) with visual outputs.
 Example image by [me.](https://huggingface.co/Werli)
 """
 def parse_args()->argparse.Namespace:parser=argparse.ArgumentParser();parser.add_argument('--score-slider-step',type=float,default=.05);parser.add_argument('--score-general-threshold',type=float,default=.35);parser.add_argument('--score-character-threshold',type=float,default=.85);parser.add_argument('--share',action='store_true');return parser.parse_args()
 def load_labels(dataframe)->list[str]:name_series=dataframe['name'];name_series=name_series.map(lambda x:x.replace('_',' ')if x not in kaomojis else x);tag_names=name_series.tolist();rating_indexes=list(np.where(dataframe['category']==9)[0]);general_indexes=list(np.where(dataframe['category']==0)[0]);character_indexes=list(np.where(dataframe['category']==4)[0]);return tag_names,rating_indexes,general_indexes,character_indexes
 def mcut_threshold(probs):sorted_probs=probs[probs.argsort()[::-1]];difs=sorted_probs[:-1]-sorted_probs[1:];t=difs.argmax();thresh=(sorted_probs[t]+sorted_probs[t+1])/2;return thresh
 class Timer:
 	def __init__(self):self.start_time=time.perf_counter();self.checkpoints=[('Start',self.start_time)]
 	def checkpoint(self,label='Checkpoint'):now=time.perf_counter();self.checkpoints.append((label,now))
 		for(label,curr_time)in self.checkpoints[1:]:elapsed=curr_time-prev_time;print(f"{label.ljust(max_label_length)}: {elapsed:.3f} seconds");prev_time=curr_time
 		total_time=self.checkpoints[-1][1]-self.start_time;print(f"{'Total Execution Time'.ljust(max_label_length)}: {total_time:.3f} seconds\n");self.checkpoints.clear()
 	def restart(self):self.start_time=time.perf_counter();self.checkpoints=[('Start',self.start_time)]
+# Llama
 class Llama3Reorganize:
 	def __init__(self,repoId:str,device:str=None,loadModel:bool=False):
 		self.modelPath=self.download_model(repoId)
 			else:device='cpu'
 		self.device=device;self.system_prompt='Please reorganize the following English labels describing a single image into a readable English article of no more than 300 words. If there are name labels, include them in the article. Since the target audience is not general, explicit content can be written in full without hesitation. No introduction is needed; directly reply with the English article:'
 		if loadModel:self.load_model()
 def download_model(self,repoId):
 	import warnings,requests;allowPatterns=['config.json','generation_config.json','model.bin','pytorch_model.bin','pytorch_model.bin.index.json','pytorch_model-*.bin','sentencepiece.bpe.model','tokenizer.json','tokenizer_config.json','shared_vocabulary.txt','shared_vocabulary.json','special_tokens_map.json','spiece.model','vocab.json','model.safetensors','model-*.safetensors','model.safetensors.index.json','quantize_config.json','tokenizer.model','vocabulary.json','preprocessor_config.json','added_tokens.json'];kwargs={'allow_patterns':allowPatterns}
 	try:return huggingface_hub.snapshot_download(repoId,**kwargs)
 	except(huggingface_hub.utils.HfHubHTTPError,requests.exceptions.ConnectionError)as exception:warnings.warn('An error occured while synchronizing the model %s from the Hugging Face Hub:\n%s',repoId,exception);warnings.warn('Trying to load the model directly from the local cache, if it exists.');kwargs['local_files_only']=True;return huggingface_hub.snapshot_download(repoId,**kwargs)
 def load_model(self):
 	import ctranslate2,transformers
 	try:print('\n\nLoading model: %s\n\n'%self.modelPath);kwargsTokenizer={'pretrained_model_name_or_path':self.modelPath};kwargsModel={'device':self.device,'model_path':self.modelPath,'compute_type':'auto'};self.roleSystem={'role':'system','content':self.system_prompt};self.Model=ctranslate2.Generator(**kwargsModel);self.Tokenizer=transformers.AutoTokenizer.from_pretrained(**kwargsTokenizer);self.terminators=[self.Tokenizer.eos_token_id,self.Tokenizer.convert_tokens_to_ids('<|eot_id|>')]
 	except Exception as e:self.release_vram();raise e
 def release_vram(self):
 	try:
 		import torch
 			except Exception as e:print(traceback.format_exc());print('\tcuda empty cache, error: '+str(e))
 			print('release vram end.')
 	except Exception as e:print(traceback.format_exc());print('Error release vram: '+str(e))
 def reorganize(self,text:str,max_length:int=400):
 	output=None;result=None
 	try:
 			elif result[0]=='『'and result[len(result)-1]=='』':result=result[1:-1]
 	except Exception as e:print(traceback.format_exc());print('Error reorganize text: '+str(e))
 	return result
+# End Llama
 class Predictor:
     def __init__(self):
         self.model_target_size = None
                 except Exception as e:
                     print(traceback.format_exc())
                     print("Error predict: " + str(e))
             # Zip creation logic:
             download = []
             if txt_infos is not None and len(txt_infos) > 0:
 	selected_image=ast.literal_eval(selected_image)
 	if selected_image in gallery:gallery.remove(selected_image)
 	return gallery
 def fig_to_pil(fig):buf=io.BytesIO();fig.savefig(buf,format='png');buf.seek(0);return Image.open(buf)
 @spaces.GPU
 def run_example(task_prompt,image,text_input=None):
     SWINV2_MODEL_IS_DSV1_REPO,
     EVA02_LARGE_MODEL_IS_DSV1_REPO,
 ]
+llama_list=[META_LLAMA_3_3B_REPO,META_LLAMA_3_8B_REPO]
 def _restart_space():
 	HF_TOKEN=os.getenv('HF_TOKEN')