Spaces:
Sleeping
Sleeping
Commit
·
e4b559c
1
Parent(s):
e0b9d86
123
Browse files- app.py +19 -3
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -145,7 +145,13 @@ def image_captioning(image):
|
|
| 145 |
|
| 146 |
def visual_question_answering(image, question):
|
| 147 |
vqa_pipeline = load_image_model("vqa")
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
return f"{result[0]['answer']} (confidence: {result[0]['score']:.3f})"
|
| 150 |
|
| 151 |
def zero_shot_classification(image, classes):
|
|
@@ -175,7 +181,17 @@ def image_retrieval(images, query):
|
|
| 175 |
processor = models["clip_processor"]
|
| 176 |
|
| 177 |
# Обрабатываем все изображения
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
with torch.no_grad():
|
| 180 |
image_embeddings = model.get_image_features(**image_inputs)
|
| 181 |
image_embeddings = image_embeddings / image_embeddings.norm(dim=-1, keepdim=True)
|
|
@@ -193,7 +209,7 @@ def image_retrieval(images, query):
|
|
| 193 |
best_idx = similarities.argmax().item()
|
| 194 |
best_score = similarities[best_idx].item()
|
| 195 |
|
| 196 |
-
return f"Лучшее изображение: #{best_idx + 1} (схожесть: {best_score:.4f})",
|
| 197 |
|
| 198 |
# Создаем интерфейс Gradio
|
| 199 |
with gr.Blocks(title="Multimodal AI Demo", theme=gr.themes.Soft()) as demo:
|
|
|
|
| 145 |
|
| 146 |
def visual_question_answering(image, question):
|
| 147 |
vqa_pipeline = load_image_model("vqa")
|
| 148 |
+
cleaned_question = (question or "").strip()
|
| 149 |
+
result = vqa_pipeline(
|
| 150 |
+
image=image,
|
| 151 |
+
question=cleaned_question,
|
| 152 |
+
truncation=True, # keep text within ViLT max sequence length (40)
|
| 153 |
+
max_length=40,
|
| 154 |
+
)
|
| 155 |
return f"{result[0]['answer']} (confidence: {result[0]['score']:.3f})"
|
| 156 |
|
| 157 |
def zero_shot_classification(image, classes):
|
|
|
|
| 181 |
processor = models["clip_processor"]
|
| 182 |
|
| 183 |
# Обрабатываем все изображения
|
| 184 |
+
if isinstance(images, tuple):
|
| 185 |
+
images = list(images)
|
| 186 |
+
normalized_images = []
|
| 187 |
+
for item in images:
|
| 188 |
+
# Gallery может вернуть (image, caption); берем только картинку
|
| 189 |
+
if isinstance(item, (list, tuple)) and item:
|
| 190 |
+
normalized_images.append(item[0])
|
| 191 |
+
else:
|
| 192 |
+
normalized_images.append(item)
|
| 193 |
+
|
| 194 |
+
image_inputs = processor(images=normalized_images, return_tensors="pt", padding=True)
|
| 195 |
with torch.no_grad():
|
| 196 |
image_embeddings = model.get_image_features(**image_inputs)
|
| 197 |
image_embeddings = image_embeddings / image_embeddings.norm(dim=-1, keepdim=True)
|
|
|
|
| 209 |
best_idx = similarities.argmax().item()
|
| 210 |
best_score = similarities[best_idx].item()
|
| 211 |
|
| 212 |
+
return f"Лучшее изображение: #{best_idx + 1} (схожесть: {best_score:.4f})", normalized_images[best_idx]
|
| 213 |
|
| 214 |
# Создаем интерфейс Gradio
|
| 215 |
with gr.Blocks(title="Multimodal AI Demo", theme=gr.themes.Soft()) as demo:
|
requirements.txt
CHANGED
|
@@ -10,3 +10,4 @@ sentence-transformers>=2.2.0
|
|
| 10 |
librosa>=0.10.0
|
| 11 |
requests>=2.28.0
|
| 12 |
accelerate>=0.20.0
|
|
|
|
|
|
| 10 |
librosa>=0.10.0
|
| 11 |
requests>=2.28.0
|
| 12 |
accelerate>=0.20.0
|
| 13 |
+
timm>=0.6.12
|