Spaces:

EYEDOL
/

AGRO

Sleeping

App Files Files Community

EYEDOL commited on Nov 16, 2025

Commit

8576118

verified ·

1 Parent(s): 73ef8da

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -39

app.py CHANGED Viewed

@@ -4,83 +4,82 @@ import json
 import numpy as np
 from PIL import Image
 import torch
-import torch.nn.functional as F
 from transformers import AutoProcessor, AutoModel
-import faiss
 import gradio as gr
-# CONFIG - make sure paths match those produced by build_index.py
 MODEL_ID = "EYEDOL/siglipFULL-agri-finetuned"
-FAISS_DIR = "faiss_data"
-INDEX_FILE = os.path.join(FAISS_DIR, "texts.faiss")
-TEXTS_JSONL = os.path.join(FAISS_DIR, "texts.jsonl")
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-TOP_K = 5
-# Load metadata texts into memory
 texts = []
-with open(TEXTS_JSONL, "r", encoding="utf-8") as f:
     for line in f:
         obj = json.loads(line.strip())
         texts.append(obj.get("text", ""))
-print(f"Loaded {len(texts)} texts.")
-# Load FAISS index
-print("Loading FAISS index...")
-index = faiss.read_index(INDEX_FILE)  # IndexFlatIP saved previously
-# If index is on CPU but you want to use GPU inference in Space, you can move to GPU if available and faiss-gpu installed.
-# Load model + processor
-print("Loading model & processor...")
 processor = AutoProcessor.from_pretrained(MODEL_ID)
 model = AutoModel.from_pretrained(MODEL_ID).to(DEVICE)
 model.eval()
-def search_image(image: Image.Image, top_k: int = TOP_K):
-    # Preprocess image
     inputs = processor(images=image.convert("RGB"), return_tensors="pt").to(DEVICE)
     with torch.no_grad():
         img_embed = model.get_image_features(**inputs)  # (1, D)
         img_embed = img_embed / img_embed.norm(p=2, dim=-1, keepdim=True)
-    img_vec = img_embed.cpu().numpy().astype('float32')  # shape (1, D)
-    # Query FAISS (index expects float32)
-    D, I = index.search(img_vec, top_k)  # D=distance matrix (inner product), I=indices
     results = []
-    for score, idx in zip(D[0], I[0]):
-        if idx < 0:
-            continue
         text = texts[idx] if idx < len(texts) else ""
-        # score is inner product cosine since vectors were normalized (range -1..1)
-        results.append({"text": text, "score": float(score)})
-    return results
-# Build Gradio UI
-def infer_and_format(file, top_k):
-    if file is None:
-        return "Upload an image", None
-    image = Image.open(file).convert("RGB")
-    results = search_image(image, top_k)
-    # build HTML or simple text output
     lines = []
-    for i, r in enumerate(results, 1):
         lines.append(f"<b>Rank {i}</b> — score: {r['score']:.4f}<br>{r['text']}")
     html = "<br><br>".join(lines)
     return html, image
 with gr.Blocks() as demo:
-    gr.Markdown("# Image → Retrieved Texts")
     with gr.Row():
         with gr.Column(scale=1):
-            img_in = gr.Image(type="filepath", label="Upload image")
-            k_slider = gr.Slider(1, 10, value=TOP_K, step=1, label="Top K")
             run_btn = gr.Button("Retrieve")
         with gr.Column(scale=1):
             out_html = gr.HTML()
             out_img = gr.Image(label="Input image (preview)")
-    run_btn.click(infer_and_format, inputs=[img_in, k_slider], outputs=[out_html, out_img])
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))

 import numpy as np
 from PIL import Image
 import torch
 from transformers import AutoProcessor, AutoModel
+from sklearn.neighbors import NearestNeighbors
 import gradio as gr
+# CONFIG
 MODEL_ID = "EYEDOL/siglipFULL-agri-finetuned"
+DATA_DIR = "faiss_free_data"
+EMBEDS_FILE = os.path.join(DATA_DIR, "text_embeds.npy")
+TEXTS_FILE = os.path.join(DATA_DIR, "texts.jsonl")
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEFAULT_TOPK = 5
+# ---- Load texts metadata
 texts = []
+with open(TEXTS_FILE, "r", encoding="utf-8") as f:
     for line in f:
         obj = json.loads(line.strip())
         texts.append(obj.get("text", ""))
+# ---- Load embeddings
+print("Loading embeddings...")
+embs = np.load(EMBEDS_FILE)  # shape (N, D), dtype float32
+print("Embeddings loaded:", embs.shape)
+# ---- Build (or load) NearestNeighbors index
+# We use metric='cosine' so kneighbors returns cosine *distance* (range 0..2)
+# We'll convert to similarity: sim = 1 - distance (works when embeddings were normalized)
+nn = NearestNeighbors(n_neighbors=DEFAULT_TOPK, metric="cosine", n_jobs=-1)
+nn.fit(embs)
+print("NearestNeighbors ready.")
+# ---- Load model & processor
 processor = AutoProcessor.from_pretrained(MODEL_ID)
 model = AutoModel.from_pretrained(MODEL_ID).to(DEVICE)
 model.eval()
+def retrieve_texts_from_image(image: Image.Image, top_k: int = DEFAULT_TOPK):
+    if image is None:
+        return "No image uploaded", None
+    # Compute image embedding
     inputs = processor(images=image.convert("RGB"), return_tensors="pt").to(DEVICE)
     with torch.no_grad():
         img_embed = model.get_image_features(**inputs)  # (1, D)
         img_embed = img_embed / img_embed.norm(p=2, dim=-1, keepdim=True)
+    img_vec = img_embed.cpu().numpy().astype("float32")  # (1, D)
+    # Query NN
+    distances, indices = nn.kneighbors(img_vec, n_neighbors=top_k)
+    # sklearn returns cosine distances: dist = 1 - cosine_similarity (if vectors normalized)
     results = []
+    for dist, idx in zip(distances[0], indices[0]):
+        sim = 1.0 - float(dist)    # similarity score in approx range [-1..1], typically [0..1]
         text = texts[idx] if idx < len(texts) else ""
+        results.append({"text": text, "score": sim, "id": int(idx)})
+    # format HTML
     lines = []
+    for i, r in enumerate(results, start=1):
         lines.append(f"<b>Rank {i}</b> — score: {r['score']:.4f}<br>{r['text']}")
     html = "<br><br>".join(lines)
     return html, image
+# ---- Gradio UI
 with gr.Blocks() as demo:
+    gr.Markdown("# Image → Retrieved Texts (NO FAISS)")
     with gr.Row():
         with gr.Column(scale=1):
+            img_in = gr.Image(type="pil", label="Upload image")
+            k_slider = gr.Slider(1, 20, value=DEFAULT_TOPK, step=1, label="Top K")
             run_btn = gr.Button("Retrieve")
         with gr.Column(scale=1):
             out_html = gr.HTML()
             out_img = gr.Image(label="Input image (preview)")
+    run_btn.click(retrieve_texts_from_image, inputs=[img_in, k_slider], outputs=[out_html, out_img])
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))