Spaces:
Sleeping
Sleeping
Improve Visual Search with Negative Vectors
Browse files
app.py
CHANGED
|
@@ -12,6 +12,7 @@ from PIL import Image, ImageDraw, ImageFont
|
|
| 12 |
import io
|
| 13 |
import zipfile
|
| 14 |
from huggingface_hub import hf_hub_download
|
|
|
|
| 15 |
|
| 16 |
app = FastAPI()
|
| 17 |
|
|
@@ -134,7 +135,7 @@ async def home(request: Request):
|
|
| 134 |
async def search(request: Request, q: str, searchmode: str = "text"):
|
| 135 |
if not q: return ""
|
| 136 |
|
| 137 |
-
# 1. ANALYTICS
|
| 138 |
try:
|
| 139 |
conn = get_db_connection()
|
| 140 |
c = conn.cursor()
|
|
@@ -151,19 +152,39 @@ async def search(request: Request, q: str, searchmode: str = "text"):
|
|
| 151 |
results = []
|
| 152 |
seen_files = set()
|
| 153 |
|
| 154 |
-
# --- MODE 1: VISUAL SEARCH ---
|
| 155 |
if searchmode == "visual" and visual_tbl:
|
| 156 |
try:
|
| 157 |
-
#
|
| 158 |
-
|
| 159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
for res in vec_results:
|
| 162 |
results.append({
|
| 163 |
"type": "Visual Match",
|
| 164 |
"filename": res['filename'],
|
| 165 |
"page": res['page'],
|
| 166 |
-
"text": f"
|
| 167 |
"score": 1.0 - res['_distance']
|
| 168 |
})
|
| 169 |
except Exception as e:
|
|
@@ -171,8 +192,8 @@ async def search(request: Request, q: str, searchmode: str = "text"):
|
|
| 171 |
|
| 172 |
return templates.TemplateResponse("partials/results.html", {"request": request, "results": results})
|
| 173 |
|
| 174 |
-
# --- MODE 2: TEXT
|
| 175 |
-
|
| 176 |
# A. SQLite Keyword Search
|
| 177 |
conn = get_db_connection()
|
| 178 |
cursor = conn.cursor()
|
|
|
|
| 12 |
import io
|
| 13 |
import zipfile
|
| 14 |
from huggingface_hub import hf_hub_download
|
| 15 |
+
import numpy as np
|
| 16 |
|
| 17 |
app = FastAPI()
|
| 18 |
|
|
|
|
| 135 |
async def search(request: Request, q: str, searchmode: str = "text"):
|
| 136 |
if not q: return ""
|
| 137 |
|
| 138 |
+
# 1. ANALYTICS (Keep existing code)
|
| 139 |
try:
|
| 140 |
conn = get_db_connection()
|
| 141 |
c = conn.cursor()
|
|
|
|
| 152 |
results = []
|
| 153 |
seen_files = set()
|
| 154 |
|
| 155 |
+
# --- MODE 1: VISUAL SEARCH (UPGRADED LOGIC) ---
|
| 156 |
if searchmode == "visual" and visual_tbl:
|
| 157 |
try:
|
| 158 |
+
# A. ENHANCE THE POSITIVE (Prompt Engineering)
|
| 159 |
+
# CLIP works better when you tell it "This is a photo"
|
| 160 |
+
positive_prompt = f"a photo of {q}, visual evidence"
|
| 161 |
+
pos_vec = visual_model.encode(positive_prompt)
|
| 162 |
+
|
| 163 |
+
# B. DEFINE THE NEGATIVE (The "Noise" to remove)
|
| 164 |
+
# We tell the AI: "Ignore boring text pages"
|
| 165 |
+
negative_prompt = "scanned text document, white paper, handwriting, typed text"
|
| 166 |
+
neg_vec = visual_model.encode(negative_prompt)
|
| 167 |
+
|
| 168 |
+
# C. VECTOR MATH (The Magic)
|
| 169 |
+
# Formula: Target = UserQuery - (0.5 * BoringPaper)
|
| 170 |
+
# This pushes the search vector AWAY from text and TOWARDS the object
|
| 171 |
+
alpha = 0.5
|
| 172 |
+
final_vec = pos_vec - (alpha * neg_vec)
|
| 173 |
+
|
| 174 |
+
# Normalize vector (Important for accurate cosine similarity)
|
| 175 |
+
norm = np.linalg.norm(final_vec)
|
| 176 |
+
if norm > 0:
|
| 177 |
+
final_vec = final_vec / norm
|
| 178 |
+
|
| 179 |
+
# Search with the new "Clean" vector
|
| 180 |
+
vec_results = visual_tbl.search(final_vec).limit(20).to_list()
|
| 181 |
|
| 182 |
for res in vec_results:
|
| 183 |
results.append({
|
| 184 |
"type": "Visual Match",
|
| 185 |
"filename": res['filename'],
|
| 186 |
"page": res['page'],
|
| 187 |
+
"text": f"Visual match for '{q}'",
|
| 188 |
"score": 1.0 - res['_distance']
|
| 189 |
})
|
| 190 |
except Exception as e:
|
|
|
|
| 192 |
|
| 193 |
return templates.TemplateResponse("partials/results.html", {"request": request, "results": results})
|
| 194 |
|
| 195 |
+
# --- MODE 2: TEXT SEARCH (Keep existing code exactly as is) ---
|
| 196 |
+
# ... (Paste your existing text search logic here) ...
|
| 197 |
# A. SQLite Keyword Search
|
| 198 |
conn = get_db_connection()
|
| 199 |
cursor = conn.cursor()
|