Spaces:
Sleeping
Sleeping
Update inference.py
Browse files- inference.py +22 -7
inference.py
CHANGED
|
@@ -6,6 +6,7 @@ import faiss
|
|
| 6 |
import numpy as np
|
| 7 |
import json
|
| 8 |
import spacy
|
|
|
|
| 9 |
|
| 10 |
# Load models and resources
|
| 11 |
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
|
@@ -112,10 +113,24 @@ def generate_rag_caption(image):
|
|
| 112 |
Input: PIL Image or image path (str).
|
| 113 |
Output: Caption (str).
|
| 114 |
"""
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
import numpy as np
|
| 7 |
import json
|
| 8 |
import spacy
|
| 9 |
+
import time
|
| 10 |
|
| 11 |
# Load models and resources
|
| 12 |
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
|
|
|
| 113 |
Input: PIL Image or image path (str).
|
| 114 |
Output: Caption (str).
|
| 115 |
"""
|
| 116 |
+
try:
|
| 117 |
+
start = time.time()
|
| 118 |
+
embedding = extract_image_features(image)
|
| 119 |
+
print("⏱️ CLIP feature extraction:", round(time.time() - start, 2), "s")
|
| 120 |
+
if embedding is None:
|
| 121 |
+
return "Failed to process image."
|
| 122 |
+
|
| 123 |
+
start = time.time()
|
| 124 |
+
retrieved = retrieve_similar_captions(embedding, k=5)
|
| 125 |
+
print("⏱️ Caption retrieval:", round(time.time() - start, 2), "s")
|
| 126 |
+
if not retrieved:
|
| 127 |
+
return "No similar captions found."
|
| 128 |
+
|
| 129 |
+
start = time.time()
|
| 130 |
+
caption = generate_caption_from_retrieved(retrieved)
|
| 131 |
+
print("⏱️ Caption generation:", round(time.time() - start, 2), "s")
|
| 132 |
+
|
| 133 |
+
return caption
|
| 134 |
+
except Exception as e:
|
| 135 |
+
print(f"Error in RAG captioning: {e}")
|
| 136 |
+
return "Something went wrong during caption generation."
|