Spaces:

NEXAS
/

ImageSearchClip

Build error

App Files Files Community

NEXAS commited on Jun 26, 2025

Commit

ee979c8

verified ·

1 Parent(s): 228d5b0

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +29 -20

src/streamlit_app.py CHANGED Viewed

@@ -10,20 +10,25 @@ from skimage import data as skdata
 from skimage.io import imsave
 import uuid
-# Paths
-DB_PATH = './data/image_vdb'
-IMAGES_DIR = './data/extracted_images'
 os.makedirs(IMAGES_DIR, exist_ok=True)
-# Init ChromaDB
-chroma_client = PersistentClient(path=DB_PATH)
-image_loader = ImageLoader()
-embedding_fn = OpenCLIPEmbeddingFunction()
-image_collection = chroma_client.get_or_create_collection(
-    name="image", embedding_function=embedding_fn, data_loader=image_loader
-)
-# === Image Handling ===
 def extract_images_from_pdf(pdf_bytes):
     pdf = fitz.open(stream=pdf_bytes, filetype="pdf")
     saved_images = []
@@ -47,26 +52,31 @@ def extract_images_from_pdf(pdf_bytes):
     return saved_images
 def index_images(image_paths):
     ids = []
     uris = []
-    for i, path in enumerate(sorted(image_paths)):
-        if path.endswith((".png", ".jpeg", ".jpg")):
             ids.append(str(uuid.uuid4()))
             uris.append(path)
     if ids:
         image_collection.add(ids=ids, uris=uris)
 def query_similar_images(image_file, top_k=5):
     with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp:
         tmp.write(image_file.read())
         tmp_path = tmp.name
-    results = image_collection.query(query_uris=[tmp_path], n_results=top_k)
-    os.remove(tmp_path)
-    return results['uris'][0]
 def load_skimage_demo_images():
     demo_images = {
         "astronaut": skdata.astronaut(),
@@ -87,7 +97,6 @@ def load_skimage_demo_images():
 # === Streamlit UI ===
 st.title("🔍 Image Similarity Search from PDF or Custom Dataset")
-# Source Selector
 source = st.radio(
     "Select Image Source",
     ["Upload PDF", "Upload Images", "Load Demo Dataset"],
@@ -104,7 +113,9 @@ if source == "Upload PDF":
             st.image(images, width=150)
 elif source == "Upload Images":
-    uploaded_imgs = st.file_uploader("📤 Upload one or more images", type=["jpg", "jpeg", "png"], accept_multiple_files=True)
     if uploaded_imgs:
         saved_paths = []
         for img in uploaded_imgs:
@@ -124,10 +135,8 @@ elif source == "Load Demo Dataset":
         st.success("Demo images loaded and indexed.")
         st.image(demo_paths, width=150)
-# Divider
 st.divider()
-# Query Interface
 st.subheader("🔎 Search for Similar Images")
 query_img = st.file_uploader("Upload a query image", type=["jpg", "jpeg", "png"])
 if query_img:

 from skimage.io import imsave
 import uuid
+# Use safe temp directories for Streamlit or restricted environments
+TEMP_DIR = tempfile.gettempdir()
+IMAGES_DIR = os.path.join(TEMP_DIR, "extracted_images")
+DB_PATH = os.path.join(TEMP_DIR, "image_vdb")
 os.makedirs(IMAGES_DIR, exist_ok=True)
+@st.cache_resource
+def get_chroma_collection():
+    chroma_client = PersistentClient(path=DB_PATH)
+    image_loader = ImageLoader()
+    embedding_fn = OpenCLIPEmbeddingFunction()
+    collection = chroma_client.get_or_create_collection(
+        name="image", embedding_function=embedding_fn, data_loader=image_loader
+    )
+    return collection
+image_collection = get_chroma_collection()
+# === Image Extraction ===
 def extract_images_from_pdf(pdf_bytes):
     pdf = fitz.open(stream=pdf_bytes, filetype="pdf")
     saved_images = []
     return saved_images
+# === Indexing ===
 def index_images(image_paths):
     ids = []
     uris = []
+    for path in sorted(image_paths):
+        if path.lower().endswith((".png", ".jpeg", ".jpg")):
             ids.append(str(uuid.uuid4()))
             uris.append(path)
     if ids:
         image_collection.add(ids=ids, uris=uris)
+# === Querying ===
 def query_similar_images(image_file, top_k=5):
     with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp:
         tmp.write(image_file.read())
         tmp_path = tmp.name
+    try:
+        results = image_collection.query(query_uris=[tmp_path], n_results=top_k)
+        return results['uris'][0]
+    finally:
+        os.remove(tmp_path)
+# === Demo images ===
 def load_skimage_demo_images():
     demo_images = {
         "astronaut": skdata.astronaut(),
 # === Streamlit UI ===
 st.title("🔍 Image Similarity Search from PDF or Custom Dataset")
 source = st.radio(
     "Select Image Source",
     ["Upload PDF", "Upload Images", "Load Demo Dataset"],
             st.image(images, width=150)
 elif source == "Upload Images":
+    uploaded_imgs = st.file_uploader(
+        "📤 Upload one or more images", type=["jpg", "jpeg", "png"], accept_multiple_files=True
+    )
     if uploaded_imgs:
         saved_paths = []
         for img in uploaded_imgs:
         st.success("Demo images loaded and indexed.")
         st.image(demo_paths, width=150)
 st.divider()
 st.subheader("🔎 Search for Similar Images")
 query_img = st.file_uploader("Upload a query image", type=["jpg", "jpeg", "png"])
 if query_img: