Spaces:

aiivar
/

Transformers

Sleeping

App Files Files Community

MinAA commited on Jan 8

Commit

859c222

1 Parent(s): 3832d6e

init

Browse files

Files changed (1) hide show

app.py +110 -29

app.py CHANGED Viewed

@@ -11,24 +11,90 @@ import warnings
 import time
 import inspect
 from datetime import datetime
 warnings.filterwarnings("ignore")
-# Кэш для хранения загруженных моделей
-model_cache = {}
 # История выполнения моделей
 history = []
 MAX_HISTORY_SIZE = 50
 def get_pipeline(task, model_name, **kwargs):
-    """Загрузка pipeline с кэшированием"""
     cache_key = f"{task}_{model_name}"
-    if cache_key not in model_cache:
         try:
-            model_cache[cache_key] = pipeline(task, model=model_name, **kwargs)
         except Exception as e:
             raise Exception(f"Ошибка загрузки модели: {str(e)}")
-    return model_cache[cache_key]
 def measure_time_and_save(task_name):
     """Декоратор для измерения времени выполнения и сохранения в историю"""
@@ -214,8 +280,9 @@ def audio_classifier(audio, model_name):
     try:
         classifier = get_pipeline("audio-classification", model_name)
         result = classifier(audio)
-        if isinstance(result, list):
-            result = result[0]
         output = "Результаты классификации:\n"
         for item in result[:5]:
             output += f"{item['label']}: {item['score']:.4f}\n"
@@ -230,12 +297,14 @@ def audio_zero_shot_classifier(audio, candidate_labels, model_name):
         # Используем CLAP для zero-shot классификации аудио
         from transformers import ClapProcessor, ClapModel
         cache_key = f"audio_zero_shot_{model_name}"
-        if cache_key not in model_cache:
             processor = ClapProcessor.from_pretrained(model_name)
             model = ClapModel.from_pretrained(model_name)
-            model_cache[cache_key] = (processor, model)
-        processor, model = model_cache[cache_key]
         labels = [label.strip() for label in candidate_labels.split(",")]
         inputs = processor(text=labels, audios=audio, return_tensors="pt", padding=True)
@@ -273,15 +342,17 @@ def speech_synthesis(text, model_name):
             from datasets import load_dataset
             cache_key = f"tts_{model_name}"
-            if cache_key not in model_cache:
                 processor = SpeechT5Processor.from_pretrained(model_name)
                 model = SpeechT5ForTextToSpeech.from_pretrained(model_name)
                 vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
                 embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
                 speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
-                model_cache[cache_key] = (processor, model, vocoder, speaker_embeddings)
-            processor, model, vocoder, speaker_embeddings = model_cache[cache_key]
             inputs = processor(text=text, return_tensors="pt")
             with torch.no_grad():
                 speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
@@ -331,12 +402,14 @@ def image_text_matching(image, text, model_name):
     """Сопоставление изображения и текста"""
     try:
         cache_key = f"clip_{model_name}"
-        if cache_key not in model_cache:
             processor = CLIPProcessor.from_pretrained(model_name)
             model = CLIPModel.from_pretrained(model_name)
-            model_cache[cache_key] = (processor, model)
-        processor, model = model_cache[cache_key]
         inputs = processor(text=[text], images=image, return_tensors="pt", padding=True)
         with torch.no_grad():
@@ -355,12 +428,14 @@ def image_captioning(image, model_name):
     try:
         if "blip" in model_name.lower():
             cache_key = f"caption_blip_{model_name}"
-            if cache_key not in model_cache:
                 processor = BlipProcessor.from_pretrained(model_name)
                 model = BlipForConditionalGeneration.from_pretrained(model_name)
-                model_cache[cache_key] = (processor, model)
-            processor, model = model_cache[cache_key]
             inputs = processor(image, return_tensors="pt")
             out = model.generate(**inputs, max_length=50)
             caption = processor.decode(out[0], skip_special_tokens=True)
@@ -380,12 +455,14 @@ def visual_qa(image, question, model_name):
     try:
         if "vilt" in model_name.lower():
             cache_key = f"vqa_vilt_{model_name}"
-            if cache_key not in model_cache:
                 processor = ViltProcessor.from_pretrained(model_name)
                 model = ViltForQuestionAnswering.from_pretrained(model_name)
-                model_cache[cache_key] = (processor, model)
-            processor, model = model_cache[cache_key]
             inputs = processor(image, question, return_tensors="pt")
             outputs = model(**inputs)
             logits = outputs.logits
@@ -394,12 +471,14 @@ def visual_qa(image, question, model_name):
             return f"Ответ: {answer}"
         elif "blip" in model_name.lower():
             cache_key = f"vqa_blip_{model_name}"
-            if cache_key not in model_cache:
                 processor = BlipProcessor.from_pretrained(model_name)
                 model = BlipForConditionalGeneration.from_pretrained(model_name)
-                model_cache[cache_key] = (processor, model)
-            processor, model = model_cache[cache_key]
             inputs = processor(image, question, return_tensors="pt")
             out = model.generate(**inputs, max_length=50)
             answer = processor.decode(out[0], skip_special_tokens=True)
@@ -418,12 +497,14 @@ def image_zero_shot_classification(image, candidate_labels, model_name):
     """Zero-shot классификация изображений"""
     try:
         cache_key = f"clip_zs_{model_name}"
-        if cache_key not in model_cache:
             processor = CLIPProcessor.from_pretrained(model_name)
             model = CLIPModel.from_pretrained(model_name)
-            model_cache[cache_key] = (processor, model)
-        processor, model = model_cache[cache_key]
         labels = [label.strip() for label in candidate_labels.split(",")]
         inputs = processor(text=labels, images=image, return_tensors="pt", padding=True)

 import time
 import inspect
 from datetime import datetime
+from collections import OrderedDict
 warnings.filterwarnings("ignore")
+# LRU кэш для хранения загруженных моделей
+class LRUCache:
+    """LRU (Least Recently Used) кэш для ограничения использования памяти"""
+    def __init__(self, maxsize=5):
+        """
+        Args:
+            maxsize: Максимальное количество моделей в кэше
+        """
+        self.cache = OrderedDict()
+        self.maxsize = maxsize
+    def get(self, key):
+        """Получить модель из кэша"""
+        if key not in self.cache:
+            return None
+        # Перемещаем элемент в конец (как недавно использованный)
+        self.cache.move_to_end(key)
+        return self.cache[key]
+    def put(self, key, value):
+        """Добавить модель в кэш"""
+        if key in self.cache:
+            # Если ключ уже есть, обновляем и перемещаем в конец
+            self.cache.move_to_end(key)
+            self.cache[key] = value
+        else:
+            # Если кэш полон, удаляем самый старый элемент (первый в OrderedDict)
+            if len(self.cache) >= self.maxsize:
+                oldest_key = next(iter(self.cache))
+                # Освобождаем память от модели
+                old_value = self.cache.pop(oldest_key)
+                del old_value
+                # Также очищаем кэш CUDA если используется GPU
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+            self.cache[key] = value
+    def __contains__(self, key):
+        """Проверка наличия ключа в кэше"""
+        return key in self.cache
+    def __getitem__(self, key):
+        """Получить элемент через []"""
+        value = self.get(key)
+        if value is None:
+            raise KeyError(key)
+        return value
+    def __setitem__(self, key, value):
+        """Установить элемент через []"""
+        self.put(key, value)
+    def clear(self):
+        """Очистить кэш"""
+        self.cache.clear()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+    def size(self):
+        """Текущий размер кэша"""
+        return len(self.cache)
+# Создаем LRU кэш с максимальным размером 5 моделей
+# Можно изменить это значение в зависимости от доступной памяти
+model_cache = LRUCache(maxsize=5)
 # История выполнения моделей
 history = []
 MAX_HISTORY_SIZE = 50
 def get_pipeline(task, model_name, **kwargs):
+    """Загрузка pipeline с LRU кэшированием"""
     cache_key = f"{task}_{model_name}"
+    cached_model = model_cache.get(cache_key)
+    if cached_model is None:
         try:
+            cached_model = pipeline(task, model=model_name, **kwargs)
+            model_cache.put(cache_key, cached_model)
         except Exception as e:
             raise Exception(f"Ошибка загрузки модели: {str(e)}")
+    return cached_model
 def measure_time_and_save(task_name):
     """Декоратор для измерения времени выполнения и сохранения в историю"""
     try:
         classifier = get_pipeline("audio-classification", model_name)
         result = classifier(audio)
+        # audio-classification pipeline возвращает список словарей
+        if not isinstance(result, list):
+            result = [result]
         output = "Результаты классификации:\n"
         for item in result[:5]:
             output += f"{item['label']}: {item['score']:.4f}\n"
         # Используем CLAP для zero-shot классификации аудио
         from transformers import ClapProcessor, ClapModel
         cache_key = f"audio_zero_shot_{model_name}"
+        cached = model_cache.get(cache_key)
+        if cached is None:
             processor = ClapProcessor.from_pretrained(model_name)
             model = ClapModel.from_pretrained(model_name)
+            cached = (processor, model)
+            model_cache.put(cache_key, cached)
+        processor, model = cached
         labels = [label.strip() for label in candidate_labels.split(",")]
         inputs = processor(text=labels, audios=audio, return_tensors="pt", padding=True)
             from datasets import load_dataset
             cache_key = f"tts_{model_name}"
+            cached = model_cache.get(cache_key)
+            if cached is None:
                 processor = SpeechT5Processor.from_pretrained(model_name)
                 model = SpeechT5ForTextToSpeech.from_pretrained(model_name)
                 vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
                 embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
                 speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
+                cached = (processor, model, vocoder, speaker_embeddings)
+                model_cache.put(cache_key, cached)
+            processor, model, vocoder, speaker_embeddings = cached
             inputs = processor(text=text, return_tensors="pt")
             with torch.no_grad():
                 speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
     """Сопоставление изображения и текста"""
     try:
         cache_key = f"clip_{model_name}"
+        cached = model_cache.get(cache_key)
+        if cached is None:
             processor = CLIPProcessor.from_pretrained(model_name)
             model = CLIPModel.from_pretrained(model_name)
+            cached = (processor, model)
+            model_cache.put(cache_key, cached)
+        processor, model = cached
         inputs = processor(text=[text], images=image, return_tensors="pt", padding=True)
         with torch.no_grad():
     try:
         if "blip" in model_name.lower():
             cache_key = f"caption_blip_{model_name}"
+            cached = model_cache.get(cache_key)
+            if cached is None:
                 processor = BlipProcessor.from_pretrained(model_name)
                 model = BlipForConditionalGeneration.from_pretrained(model_name)
+                cached = (processor, model)
+                model_cache.put(cache_key, cached)
+            processor, model = cached
             inputs = processor(image, return_tensors="pt")
             out = model.generate(**inputs, max_length=50)
             caption = processor.decode(out[0], skip_special_tokens=True)
     try:
         if "vilt" in model_name.lower():
             cache_key = f"vqa_vilt_{model_name}"
+            cached = model_cache.get(cache_key)
+            if cached is None:
                 processor = ViltProcessor.from_pretrained(model_name)
                 model = ViltForQuestionAnswering.from_pretrained(model_name)
+                cached = (processor, model)
+                model_cache.put(cache_key, cached)
+            processor, model = cached
             inputs = processor(image, question, return_tensors="pt")
             outputs = model(**inputs)
             logits = outputs.logits
             return f"Ответ: {answer}"
         elif "blip" in model_name.lower():
             cache_key = f"vqa_blip_{model_name}"
+            cached = model_cache.get(cache_key)
+            if cached is None:
                 processor = BlipProcessor.from_pretrained(model_name)
                 model = BlipForConditionalGeneration.from_pretrained(model_name)
+                cached = (processor, model)
+                model_cache.put(cache_key, cached)
+            processor, model = cached
             inputs = processor(image, question, return_tensors="pt")
             out = model.generate(**inputs, max_length=50)
             answer = processor.decode(out[0], skip_special_tokens=True)
     """Zero-shot классификация изображений"""
     try:
         cache_key = f"clip_zs_{model_name}"
+        cached = model_cache.get(cache_key)
+        if cached is None:
             processor = CLIPProcessor.from_pretrained(model_name)
             model = CLIPModel.from_pretrained(model_name)
+            cached = (processor, model)
+            model_cache.put(cache_key, cached)
+        processor, model = cached
         labels = [label.strip() for label in candidate_labels.split(",")]
         inputs = processor(text=labels, images=image, return_tensors="pt", padding=True)