Spaces:

aiivar
/

Transformers

Sleeping

App Files Files Community

MinAA commited on Jan 8

Commit

8dc8851

1 Parent(s): 0c72405

init

Browse files

Files changed (1) hide show

app.py +279 -44

app.py CHANGED Viewed

@@ -6,7 +6,8 @@ from transformers import (
 )
 import torch
 from torch.nn.functional import cosine_similarity
-from PIL import Image
 import functools
 import warnings
 import time
@@ -134,8 +135,15 @@ def measure_time_and_save(task_name):
                 if isinstance(result, str):
                     output = result
                 elif isinstance(result, tuple) and len(result) == 2:
-                    # Аудио результат (sample_rate, audio_data)
-                    output = f"Аудио файл сгенерирован (sample_rate: {result[0]})"
                 else:
                     output = str(result)[:500]
             except Exception as e:
@@ -360,6 +368,8 @@ def speech_recognition(audio, model_name):
 def speech_synthesis(text, model_name):
     """Синтез речи"""
     try:
         if "speecht5" in model_name.lower():
             from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
             import torch
@@ -381,17 +391,85 @@ def speech_synthesis(text, model_name):
             with torch.no_grad():
                 speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
-            import numpy as np
             audio_data = speech.numpy()
             sample_rate = 16000
             return (sample_rate, audio_data)
         else:
             # Для других моделей используем стандартный pipeline
             tts = get_pipeline("text-to-speech", model_name)
             result = tts(text)
-            return result
     except Exception as e:
-        return f"Ошибка: {str(e)}"
 # ==================== ЗАДАЧИ С ИЗОБРАЖЕНИЯМИ ====================
@@ -401,12 +479,91 @@ def object_detection(image, model_name):
     try:
         detector = get_pipeline("object-detection", model_name)
         result = detector(image)
         output = "Обнаруженные объекты:\n"
-        for item in result:
-            output += f"{item['label']}: уверенность {item['score']:.4f}, координаты {item['box']}\n"
-        return output
     except Exception as e:
-        return f"Ошибка: {str(e)}"
 @measure_time_and_save("Сегментация изображений")
 def image_segmentation(image, model_name):
@@ -414,12 +571,88 @@ def image_segmentation(image, model_name):
     try:
         segmenter = get_pipeline("image-segmentation", model_name)
         result = segmenter(image)
         output = "Сегменты:\n"
-        for item in result:
-            output += f"{item['label']}: уверенность {item['score']:.4f}\n"
-        return output
     except Exception as e:
-        return f"Ошибка: {str(e)}"
 @measure_time_and_save("Сопоставление изображение-текст")
 def image_text_matching(image, text, model_name):
@@ -711,8 +944,8 @@ with gr.Blocks(title="Трансформеры Hugging Face", theme=gr.themes.So
                     with gr.Column():
                         text_classifier_input = gr.Textbox(
                             label="Введите текст для классификации",
-                            placeholder="Я люблю это приложение!",
-                            value="Я люблю это приложение!"
                         )
                         text_classifier_model = gr.Dropdown(
                             choices=[
@@ -737,13 +970,13 @@ with gr.Blocks(title="Трансформеры Hugging Face", theme=gr.themes.So
                     with gr.Column():
                         zs_text_input = gr.Textbox(
                             label="Введите текст",
-                            placeholder="Я только что закончил читать отличную книгу",
-                            value="Я только что закончил читать отличную книгу"
                         )
                         zs_text_labels = gr.Textbox(
                             label="Кандидаты (через запятую)",
-                            placeholder="положительный, отрицательный, нейтральный",
-                            value="положительный, отрицательный, нейтральный"
                         )
                         zs_text_model = gr.Dropdown(
                             choices=[
@@ -768,8 +1001,8 @@ with gr.Blocks(title="Трансформеры Hugging Face", theme=gr.themes.So
                     with gr.Column():
                         text_gen_input = gr.Textbox(
                             label="Промпт",
-                            placeholder="В далеком будущем",
-                            value="В далеком будущем"
                         )
                         text_gen_length = gr.Slider(20, 200, value=50, step=10, label="Максимальная длина")
                         text_gen_model = gr.Dropdown(
@@ -791,8 +1024,8 @@ with gr.Blocks(title="Трансформеры Hugging Face", theme=gr.themes.So
                     with gr.Column():
                         unmasker_input = gr.Textbox(
                             label="Текст с [MASK]",
-                            placeholder="Я люблю [MASK] программирование",
-                            value="Я люблю [MASK] программирование"
                         )
                         unmasker_model = gr.Dropdown(
                             choices=[
@@ -817,8 +1050,8 @@ with gr.Blocks(title="Трансформеры Hugging Face", theme=gr.themes.So
                     with gr.Column():
                         ner_input = gr.Textbox(
                             label="Введите текст",
-                            placeholder="Меня зовут Иван, я работаю в компании Microsoft в Москве",
-                            value="Меня зовут Иван, я работаю в компании Microsoft в Москве"
                         )
                         ner_model = gr.Dropdown(
                             choices=[
@@ -843,13 +1076,13 @@ with gr.Blocks(title="Трансформеры Hugging Face", theme=gr.themes.So
                     with gr.Column():
                         qa_question = gr.Textbox(
                             label="Вопрос",
-                            placeholder="Какой цвет у неба?",
-                            value="Какой цвет у неба?"
                         )
                         qa_context = gr.Textbox(
                             label="Контекст",
-                            placeholder="Небо имеет синий цвет из-за рассеяния света",
-                            value="Небо имеет синий цвет из-за рассеяния света в атмосфере",
                             lines=3
                         )
                         qa_model = gr.Dropdown(
@@ -875,8 +1108,8 @@ with gr.Blocks(title="Трансформеры Hugging Face", theme=gr.themes.So
                     with gr.Column():
                         summarizer_input = gr.Textbox(
                             label="Текст для суммаризации",
-                            placeholder="Введите длинный текст...",
-                            value="Искусственный интеллект - это область компьютерных наук, которая занимается созданием интеллектуальных машин. Машинное обучение является подмножеством искусственного интеллекта, которое позволяет системам автоматически учиться и улучшаться на основе опыта.",
                             lines=5
                         )
                         summarizer_max = gr.Slider(20, 200, value=50, step=10, label="Максимальная длина")
@@ -996,8 +1229,8 @@ with gr.Blocks(title="Трансформеры Hugging Face", theme=gr.themes.So
                         )
                         zs_audio_labels = gr.Textbox(
                             label="Кандидаты (через запятую)",
-                            placeholder="музыка, речь, шум",
-                            value="музыка, речь, шум"
                         )
                         zs_audio_model = gr.Dropdown(
                             choices=["laion/clap-htsat-unfused"],
@@ -1043,8 +1276,8 @@ with gr.Blocks(title="Трансформеры Hugging Face", theme=gr.themes.So
                     with gr.Column():
                         tts_input = gr.Textbox(
                             label="Введите текст для синтеза",
-                            placeholder="Привет, это тест синтеза речи",
-                            value="Привет, это тест синтеза речи",
                             lines=3
                         )
                         tts_model = gr.Dropdown(
@@ -1088,10 +1321,11 @@ with gr.Blocks(title="Трансформеры Hugging Face", theme=gr.themes.So
                         obj_det_btn = gr.Button("Обнаружить объекты", variant="primary")
                     with gr.Column():
                         obj_det_output = gr.Textbox(label="Результат", lines=8)
                 obj_det_btn.click(
                     object_detection,
                     inputs=[obj_det_input, obj_det_model],
-                    outputs=obj_det_output
                 )
             with gr.Accordion("Сегментация изображений", open=False):
@@ -1113,10 +1347,11 @@ with gr.Blocks(title="Трансформеры Hugging Face", theme=gr.themes.So
                         seg_btn = gr.Button("Сегментировать", variant="primary")
                     with gr.Column():
                         seg_output = gr.Textbox(label="Результат", lines=8)
                 seg_btn.click(
                     image_segmentation,
                     inputs=[seg_input, seg_model],
-                    outputs=seg_output
                 )
             with gr.Accordion("Сопоставление изображение-текст", open=False):
@@ -1128,8 +1363,8 @@ with gr.Blocks(title="Трансформеры Hugging Face", theme=gr.themes.So
                         )
                         itm_text = gr.Textbox(
                             label="Введите текст",
-                            placeholder="кот сидит на траве",
-                            value="кот сидит на траве"
                         )
                         itm_model = gr.Dropdown(
                             choices=[
@@ -1183,8 +1418,8 @@ with gr.Blocks(title="Трансформеры Hugging Face", theme=gr.themes.So
                         )
                         vqa_question = gr.Textbox(
                             label="Вопрос",
-                            placeholder="Что находится на изображении?",
-                            value="Что находится на изображении?"
                         )
                         vqa_model = gr.Dropdown(
                             choices=[
@@ -1213,8 +1448,8 @@ with gr.Blocks(title="Трансформеры Hugging Face", theme=gr.themes.So
                         )
                         zs_image_labels = gr.Textbox(
                             label="Кандидаты (через запятую)",
-                            placeholder="кот, собака, птица",
-                            value="кот, собака, птица"
                         )
                         zs_image_model = gr.Dropdown(
                             choices=[

 )
 import torch
 from torch.nn.functional import cosine_similarity
+from PIL import Image, ImageDraw, ImageFont
+import numpy as np
 import functools
 import warnings
 import time
                 if isinstance(result, str):
                     output = result
                 elif isinstance(result, tuple) and len(result) == 2:
+                    # Проверяем тип второго элемента
+                    if isinstance(result[1], Image.Image):
+                        # Результат с изображением (текст, изображение)
+                        output = result[0] if isinstance(result[0], str) else str(result[0])[:500]
+                    elif isinstance(result[1], (tuple, list)) and len(result[1]) == 2:
+                        # Аудио результат (sample_rate, audio_data)
+                        output = f"Аудио файл сгенерирован (sample_rate: {result[0]})"
+                    else:
+                        output = str(result)[:500]
                 else:
                     output = str(result)[:500]
             except Exception as e:
 def speech_synthesis(text, model_name):
     """Синтез речи"""
     try:
+        import numpy as np
         if "speecht5" in model_name.lower():
             from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
             import torch
             with torch.no_grad():
                 speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
+            # Конвертируем в numpy и нормализуем
             audio_data = speech.numpy()
+            # Убеждаемся, что это 1D массив
+            if len(audio_data.shape) > 1:
+                audio_data = audio_data.flatten()
+            # Нормализуем в диапазон [-1, 1] если нужно
+            if audio_data.dtype != np.float32:
+                audio_data = audio_data.astype(np.float32)
+            # Нормализуем если значения выходят за пределы [-1, 1]
+            max_val = np.abs(audio_data).max()
+            if max_val > 1.0:
+                audio_data = audio_data / max_val
             sample_rate = 16000
             return (sample_rate, audio_data)
         else:
             # Для других моделей используем стандартный pipeline
             tts = get_pipeline("text-to-speech", model_name)
             result = tts(text)
+            # Pipeline может возвращать словарь или кортеж
+            if isinstance(result, dict):
+                # Стандартный формат: {"audio": array, "sampling_rate": int}
+                audio_data = result.get("audio", result.get("raw", None))
+                sample_rate = result.get("sampling_rate", result.get("sample_rate", 22050))
+                if audio_data is None:
+                    raise ValueError("Не удалось извлечь аудио данные из результата pipeline")
+                # Конвертируем в numpy array если нужно
+                if isinstance(audio_data, torch.Tensor):
+                    audio_data = audio_data.numpy()
+                elif not isinstance(audio_data, np.ndarray):
+                    audio_data = np.array(audio_data)
+                # Убеждаемся, что это 1D массив
+                if len(audio_data.shape) > 1:
+                    audio_data = audio_data.flatten()
+                # Нормализуем в float32
+                if audio_data.dtype != np.float32:
+                    audio_data = audio_data.astype(np.float32)
+                # Нормализуем если значения выходят за пределы [-1, 1]
+                max_val = np.abs(audio_data).max()
+                if max_val > 1.0:
+                    audio_data = audio_data / max_val
+                return (sample_rate, audio_data)
+            elif isinstance(result, tuple) and len(result) == 2:
+                # Уже в правильном формате (sample_rate, audio_data)
+                sample_rate, audio_data = result
+                # Конвертируем в numpy если нужно
+                if isinstance(audio_data, torch.Tensor):
+                    audio_data = audio_data.numpy()
+                elif not isinstance(audio_data, np.ndarray):
+                    audio_data = np.array(audio_data)
+                # Убеждаемся, что это 1D массив
+                if len(audio_data.shape) > 1:
+                    audio_data = audio_data.flatten()
+                # Нормализуем в float32
+                if audio_data.dtype != np.float32:
+                    audio_data = audio_data.astype(np.float32)
+                # Нормализуем если значения выходят за пределы [-1, 1]
+                max_val = np.abs(audio_data).max()
+                if max_val > 1.0:
+                    audio_data = audio_data / max_val
+                return (sample_rate, audio_data)
+            else:
+                raise ValueError(f"Неожиданный формат результата от pipeline: {type(result)}")
     except Exception as e:
+        # В случае ошибки возвращаем None, чтобы Gradio мог обработать это
+        # Но также логируем ошибку в историю через декоратор
+        raise Exception(f"Ошибка синтеза речи: {str(e)}")
 # ==================== ЗАДАЧИ С ИЗОБРАЖЕНИЯМИ ====================
     try:
         detector = get_pipeline("object-detection", model_name)
         result = detector(image)
+        # Создаем копию изображения для визуализации
+        img_with_boxes = image.copy()
+        draw = ImageDraw.Draw(img_with_boxes)
+        # Цвета для разных объектов
+        colors = ['red', 'blue', 'green', 'yellow', 'orange', 'purple', 'cyan', 'magenta']
         output = "Обнаруженные объекты:\n"
+        for i, item in enumerate(result):
+            box = item['box']
+            label = item['label']
+            score = item['score']
+            # Обрабатываем различные форматы координат
+            if isinstance(box, dict):
+                # Словарь с ключами 'xmin', 'ymin', 'xmax', 'ymax'
+                xmin = box.get('xmin', box.get('x1', 0))
+                ymin = box.get('ymin', box.get('y1', 0))
+                xmax = box.get('xmax', box.get('x2', 0))
+                ymax = box.get('ymax', box.get('y2', 0))
+            elif isinstance(box, (list, tuple)) and len(box) >= 4:
+                # Список [xmin, ymin, xmax, ymax] или [xcenter, ycenter, width, height]
+                if box[2] > box[0] and box[3] > box[1]:
+                    # Вероятно [xmin, ymin, xmax, ymax]
+                    xmin, ymin, xmax, ymax = box[0], box[1], box[2], box[3]
+                else:
+                    # Вероятно [xcenter, ycenter, width, height]
+                    xcenter, ycenter, width, height = box[0], box[1], box[2], box[3]
+                    xmin = xcenter - width / 2
+                    ymin = ycenter - height / 2
+                    xmax = xcenter + width / 2
+                    ymax = ycenter + height / 2
+            else:
+                # Неизвестный формат, пропускаем
+                output += f"{label}: уверенность {score:.4f}, координаты {box}\n"
+                continue
+            # Проверяем и ограничиваем координаты границами изображения
+            img_width, img_height = img_with_boxes.size
+            xmin = max(0, min(xmin, img_width))
+            ymin = max(0, min(ymin, img_height))
+            xmax = max(0, min(xmax, img_width))
+            ymax = max(0, min(ymax, img_height))
+            # Проверяем, что координаты валидны
+            if xmax <= xmin or ymax <= ymin:
+                output += f"{label}: уверенность {score:.4f}, координаты {box} (некорректные)\n"
+                continue
+            # Рисуем прямоугольник
+            color = colors[i % len(colors)]
+            draw.rectangle([xmin, ymin, xmax, ymax], outline=color, width=3)
+            # Добавляем текст с меткой и уверенностью
+            text = f"{label}: {score:.2f}"
+            try:
+                # Пытаемся использовать системный шрифт
+                font = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", 16)
+            except:
+                try:
+                    font = ImageFont.load_default()
+                except:
+                    font = None
+            # Получаем размер текста
+            if font:
+                bbox = draw.textbbox((0, 0), text, font=font)
+                text_width = bbox[2] - bbox[0]
+                text_height = bbox[3] - bbox[1]
+            else:
+                text_width = len(text) * 6
+                text_height = 12
+            # Рисуем фон для текста (проверяем границы)
+            text_y = max(0, ymin - text_height - 4)
+            text_x_end = min(img_width, xmin + text_width + 4)
+            draw.rectangle([xmin, text_y, text_x_end, ymin], fill=color)
+            draw.text((xmin + 2, text_y + 2), text, fill='white', font=font)
+            output += f"{label}: уверенность {score:.4f}, координаты {box}\n"
+        return output, img_with_boxes
     except Exception as e:
+        return f"Ошибка: {str(e)}", image
 @measure_time_and_save("Сегментация изображений")
 def image_segmentation(image, model_name):
     try:
         segmenter = get_pipeline("image-segmentation", model_name)
         result = segmenter(image)
+        # Создаем копию изображения для визуализации
+        img_with_segments = image.copy().convert("RGBA")
+        # Генерируем цвета для сегментов
+        np.random.seed(42)  # Для воспроизводимости
         output = "Сегменты:\n"
+        overlay = Image.new("RGBA", image.size, (0, 0, 0, 0))
+        draw = ImageDraw.Draw(overlay)
+        for i, item in enumerate(result):
+            label = item['label']
+            score = item['score']
+            # Генерируем полупрозрачный цвет для сегмента
+            color = tuple(np.random.randint(0, 255, 3)) + (128,)  # RGBA с прозрачностью
+            # Проверяем наличие маски
+            if 'mask' in item:
+                mask = item['mask']
+                # Преобразуем маску в numpy array
+                if isinstance(mask, Image.Image):
+                    mask_array = np.array(mask)
+                elif isinstance(mask, np.ndarray):
+                    mask_array = mask
+                else:
+                    mask_array = np.array(mask)
+                # Нормализуем маску, если нужно
+                if mask_array.dtype != np.uint8:
+                    if mask_array.max() <= 1.0:
+                        mask_array = (mask_array * 255).astype(np.uint8)
+                    else:
+                        mask_array = mask_array.astype(np.uint8)
+                # Создаем цветную маску
+                if len(mask_array.shape) == 2:  # Grayscale mask
+                    # Создаем RGBA маску
+                    colored_mask = np.zeros((mask_array.shape[0], mask_array.shape[1], 4), dtype=np.uint8)
+                    # Применяем цвет только там, где маска не равна нулю
+                    mask_bool = mask_array > 0
+                    colored_mask[mask_bool, :3] = color[:3]
+                    colored_mask[mask_bool, 3] = 128  # Альфа-канал
+                elif len(mask_array.shape) == 3 and mask_array.shape[2] == 1:
+                    # Маска с одним каналом
+                    colored_mask = np.zeros((mask_array.shape[0], mask_array.shape[1], 4), dtype=np.uint8)
+                    mask_bool = mask_array[:, :, 0] > 0
+                    colored_mask[mask_bool, :3] = color[:3]
+                    colored_mask[mask_bool, 3] = 128
+                else:
+                    # Многоканальная маска
+                    colored_mask = np.zeros((mask_array.shape[0], mask_array.shape[1], 4), dtype=np.uint8)
+                    # Используем первый канал как маску
+                    if mask_array.shape[2] >= 1:
+                        mask_bool = mask_array[:, :, 0] > 0
+                        colored_mask[mask_bool, :3] = color[:3]
+                        colored_mask[mask_bool, 3] = 128
+                # Убеждаемся, что размеры совпадают
+                if colored_mask.shape[:2] == img_with_segments.size[::-1]:
+                    mask_img = Image.fromarray(colored_mask, mode='RGBA')
+                    overlay = Image.alpha_composite(overlay, mask_img)
+                elif colored_mask.shape[:2] != overlay.size[::-1]:
+                    # Изменяем размер маски, если нужно
+                    mask_img = Image.fromarray(colored_mask, mode='RGBA')
+                    mask_img = mask_img.resize(overlay.size, Image.Resampling.LANCZOS)
+                    overlay = Image.alpha_composite(overlay, mask_img)
+            output += f"{label}: уверенность {score:.4f}\n"
+        # Накладываем overlay на исходное изображение
+        if overlay.size == img_with_segments.size:
+            img_with_segments = Image.alpha_composite(img_with_segments, overlay)
+        # Конвертируем обратно в RGB для отображения
+        img_with_segments = img_with_segments.convert("RGB")
+        return output, img_with_segments
     except Exception as e:
+        return f"Ошибка: {str(e)}", image
 @measure_time_and_save("Сопоставление изображение-текст")
 def image_text_matching(image, text, model_name):
                     with gr.Column():
                         text_classifier_input = gr.Textbox(
                             label="Введите текст для классификации",
+                            placeholder="I love this app!",
+                            value="I love this app!"
                         )
                         text_classifier_model = gr.Dropdown(
                             choices=[
                     with gr.Column():
                         zs_text_input = gr.Textbox(
                             label="Введите текст",
+                            placeholder="I just finished reading a great book",
+                            value="I just finished reading a great book"
                         )
                         zs_text_labels = gr.Textbox(
                             label="Кандидаты (через запятую)",
+                            placeholder="positive, negative, neutral",
+                            value="positive, negative, neutral"
                         )
                         zs_text_model = gr.Dropdown(
                             choices=[
                     with gr.Column():
                         text_gen_input = gr.Textbox(
                             label="Промпт",
+                            placeholder="In the distant future",
+                            value="In the distant future"
                         )
                         text_gen_length = gr.Slider(20, 200, value=50, step=10, label="Максимальная длина")
                         text_gen_model = gr.Dropdown(
                     with gr.Column():
                         unmasker_input = gr.Textbox(
                             label="Текст с [MASK]",
+                            placeholder="I love [MASK] programming",
+                            value="I love [MASK] programming"
                         )
                         unmasker_model = gr.Dropdown(
                             choices=[
                     with gr.Column():
                         ner_input = gr.Textbox(
                             label="Введите текст",
+                            placeholder="My name is John, I work at Microsoft in Seattle",
+                            value="My name is John, I work at Microsoft in Seattle"
                         )
                         ner_model = gr.Dropdown(
                             choices=[
                     with gr.Column():
                         qa_question = gr.Textbox(
                             label="Вопрос",
+                            placeholder="What color is the sky?",
+                            value="What color is the sky?"
                         )
                         qa_context = gr.Textbox(
                             label="Контекст",
+                            placeholder="The sky is blue due to light scattering",
+                            value="The sky is blue due to light scattering in the atmosphere",
                             lines=3
                         )
                         qa_model = gr.Dropdown(
                     with gr.Column():
                         summarizer_input = gr.Textbox(
                             label="Текст для суммаризации",
+                            placeholder="Enter a long text...",
+                            value="Artificial intelligence is a field of computer science that focuses on creating intelligent machines. Machine learning is a subset of artificial intelligence that enables systems to automatically learn and improve from experience.",
                             lines=5
                         )
                         summarizer_max = gr.Slider(20, 200, value=50, step=10, label="Максимальная длина")
                         )
                         zs_audio_labels = gr.Textbox(
                             label="Кандидаты (через запятую)",
+                            placeholder="music, speech, noise",
+                            value="music, speech, noise"
                         )
                         zs_audio_model = gr.Dropdown(
                             choices=["laion/clap-htsat-unfused"],
                     with gr.Column():
                         tts_input = gr.Textbox(
                             label="Введите текст для синтеза",
+                            placeholder="Hello, this is a speech synthesis test",
+                            value="Hello, this is a speech synthesis test",
                             lines=3
                         )
                         tts_model = gr.Dropdown(
                         obj_det_btn = gr.Button("Обнаружить объекты", variant="primary")
                     with gr.Column():
                         obj_det_output = gr.Textbox(label="Результат", lines=8)
+                        obj_det_image = gr.Image(label="Изображение с результатами", type="pil")
                 obj_det_btn.click(
                     object_detection,
                     inputs=[obj_det_input, obj_det_model],
+                    outputs=[obj_det_output, obj_det_image]
                 )
             with gr.Accordion("Сегментация изображений", open=False):
                         seg_btn = gr.Button("Сегментировать", variant="primary")
                     with gr.Column():
                         seg_output = gr.Textbox(label="Результат", lines=8)
+                        seg_image = gr.Image(label="Изображение с результатами", type="pil")
                 seg_btn.click(
                     image_segmentation,
                     inputs=[seg_input, seg_model],
+                    outputs=[seg_output, seg_image]
                 )
             with gr.Accordion("Сопоставление изображение-текст", open=False):
                         )
                         itm_text = gr.Textbox(
                             label="Введите текст",
+                            placeholder="a cat sitting on the grass",
+                            value="a cat sitting on the grass"
                         )
                         itm_model = gr.Dropdown(
                             choices=[
                         )
                         vqa_question = gr.Textbox(
                             label="Вопрос",
+                            placeholder="What is in the image?",
+                            value="What is in the image?"
                         )
                         vqa_model = gr.Dropdown(
                             choices=[
                         )
                         zs_image_labels = gr.Textbox(
                             label="Кандидаты (через запятую)",
+                            placeholder="cat, dog, bird",
+                            value="cat, dog, bird"
                         )
                         zs_image_model = gr.Dropdown(
                             choices=[