Spaces:
Running
on
L4
Running
on
L4
Upload folder using huggingface_hub
Browse files- frontend/app.py +5 -5
- main.py +30 -6
frontend/app.py
CHANGED
|
@@ -132,7 +132,7 @@ def SearchBox(with_border=False, query_value="", ranking_value="nn+colpali"):
|
|
| 132 |
def SampleQueries():
|
| 133 |
sample_queries = [
|
| 134 |
"Proportion of female new hires 2021-2023?",
|
| 135 |
-
"Total amount of
|
| 136 |
"What is the percentage distribution of employees with performance-based pay relative to the limit in 2023?",
|
| 137 |
"What is the breakdown of management costs by investment strategy in 2023?",
|
| 138 |
"2023 profit loss portfolio",
|
|
@@ -167,13 +167,13 @@ def Hero():
|
|
| 167 |
return Div(
|
| 168 |
H1(
|
| 169 |
"Vespa.ai + ColPali",
|
| 170 |
-
cls="text-
|
| 171 |
),
|
| 172 |
P(
|
| 173 |
"Efficient Document Retrieval with Vision Language Models",
|
| 174 |
cls="text-lg md:text-2xl text-muted-foreground md:tracking-wide",
|
| 175 |
),
|
| 176 |
-
cls="grid gap-5 text-center",
|
| 177 |
)
|
| 178 |
|
| 179 |
|
|
@@ -183,7 +183,7 @@ def Home():
|
|
| 183 |
Hero(),
|
| 184 |
SearchBox(with_border=True),
|
| 185 |
SampleQueries(),
|
| 186 |
-
cls="grid gap-8
|
| 187 |
),
|
| 188 |
cls="grid w-full h-full max-w-screen-md items-center gap-4 mx-auto",
|
| 189 |
)
|
|
@@ -319,7 +319,7 @@ def SearchResult(results: list, query_id: Optional[str] = None):
|
|
| 319 |
Div(
|
| 320 |
Img(
|
| 321 |
src=blur_image_base64,
|
| 322 |
-
hx_get=f"/full_image?
|
| 323 |
style="filter: blur(5px);",
|
| 324 |
hx_trigger="load",
|
| 325 |
hx_swap="outerHTML",
|
|
|
|
| 132 |
def SampleQueries():
|
| 133 |
sample_queries = [
|
| 134 |
"Proportion of female new hires 2021-2023?",
|
| 135 |
+
"Total amount of fixed salaries paid in 2023?",
|
| 136 |
"What is the percentage distribution of employees with performance-based pay relative to the limit in 2023?",
|
| 137 |
"What is the breakdown of management costs by investment strategy in 2023?",
|
| 138 |
"2023 profit loss portfolio",
|
|
|
|
| 167 |
return Div(
|
| 168 |
H1(
|
| 169 |
"Vespa.ai + ColPali",
|
| 170 |
+
cls="text-4xl md:text-7xl font-bold tracking-wide md:tracking-wider bg-clip-text text-transparent bg-gradient-to-r from-black to-gray-700 dark:from-white dark:to-gray-300 animate-fade-in",
|
| 171 |
),
|
| 172 |
P(
|
| 173 |
"Efficient Document Retrieval with Vision Language Models",
|
| 174 |
cls="text-lg md:text-2xl text-muted-foreground md:tracking-wide",
|
| 175 |
),
|
| 176 |
+
cls="grid gap-5 text-center pt-5",
|
| 177 |
)
|
| 178 |
|
| 179 |
|
|
|
|
| 183 |
Hero(),
|
| 184 |
SearchBox(with_border=True),
|
| 185 |
SampleQueries(),
|
| 186 |
+
cls="grid gap-8 md:-mt-[34vh]", # Negative margin only on medium and larger screens
|
| 187 |
),
|
| 188 |
cls="grid w-full h-full max-w-screen-md items-center gap-4 mx-auto",
|
| 189 |
)
|
|
|
|
| 319 |
Div(
|
| 320 |
Img(
|
| 321 |
src=blur_image_base64,
|
| 322 |
+
hx_get=f"/full_image?docid={fields['id']}&query_id={query_id}&idx={idx}",
|
| 323 |
style="filter: blur(5px);",
|
| 324 |
hx_trigger="load",
|
| 325 |
hx_swap="outerHTML",
|
main.py
CHANGED
|
@@ -256,13 +256,29 @@ async def get_sim_map(query_id: str, idx: int, token: str):
|
|
| 256 |
)
|
| 257 |
|
| 258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
@app.get("/full_image")
|
| 260 |
-
async def full_image(
|
| 261 |
"""
|
| 262 |
Endpoint to get the full quality image for a given result id.
|
| 263 |
"""
|
| 264 |
-
image_data = await get_full_image_from_vespa(vespa_app,
|
| 265 |
-
|
|
|
|
| 266 |
# Decode the base64 image data
|
| 267 |
# image_data = base64.b64decode(image_data)
|
| 268 |
image_data = "data:image/jpeg;base64," + image_data
|
|
@@ -276,11 +292,19 @@ async def full_image(id: str):
|
|
| 276 |
|
| 277 |
async def message_generator(query_id: str, query: str):
|
| 278 |
result = None
|
| 279 |
-
|
|
|
|
| 280 |
result = result_cache.get(query_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
await asyncio.sleep(0.5)
|
| 282 |
-
|
| 283 |
-
images = [result["fields"]["blur_image"] for result in search_results]
|
| 284 |
# from b64 to PIL image
|
| 285 |
images = [Image.open(io.BytesIO(base64.b64decode(img))) for img in images]
|
| 286 |
|
|
|
|
| 256 |
)
|
| 257 |
|
| 258 |
|
| 259 |
+
async def update_full_image_cache(docid: str, query_id: str, idx: int, image_data: str):
|
| 260 |
+
result = result_cache.get(query_id)
|
| 261 |
+
if result is None:
|
| 262 |
+
await asyncio.sleep(0.5)
|
| 263 |
+
return
|
| 264 |
+
search_results = get_results_children(result)
|
| 265 |
+
# Check if idx exists in list of children
|
| 266 |
+
if idx >= len(search_results):
|
| 267 |
+
await asyncio.sleep(0.5)
|
| 268 |
+
return
|
| 269 |
+
search_results[idx]["fields"]["full_image"] = image_data
|
| 270 |
+
result_cache.set(query_id, result)
|
| 271 |
+
return
|
| 272 |
+
|
| 273 |
+
|
| 274 |
@app.get("/full_image")
|
| 275 |
+
async def full_image(docid: str, query_id: str, idx: int):
|
| 276 |
"""
|
| 277 |
Endpoint to get the full quality image for a given result id.
|
| 278 |
"""
|
| 279 |
+
image_data = await get_full_image_from_vespa(vespa_app, docid)
|
| 280 |
+
# Update the cache with the full image data asynchronously to not block the request
|
| 281 |
+
asyncio.create_task(update_full_image_cache(docid, query_id, idx, image_data))
|
| 282 |
# Decode the base64 image data
|
| 283 |
# image_data = base64.b64decode(image_data)
|
| 284 |
image_data = "data:image/jpeg;base64," + image_data
|
|
|
|
| 292 |
|
| 293 |
async def message_generator(query_id: str, query: str):
|
| 294 |
result = None
|
| 295 |
+
images = []
|
| 296 |
+
while len(images) == 0:
|
| 297 |
result = result_cache.get(query_id)
|
| 298 |
+
if result is None:
|
| 299 |
+
await asyncio.sleep(0.5)
|
| 300 |
+
continue
|
| 301 |
+
search_results = get_results_children(result)
|
| 302 |
+
for single_result in search_results:
|
| 303 |
+
img = single_result["fields"].get("full_image", None)
|
| 304 |
+
if img is not None:
|
| 305 |
+
images.append(img)
|
| 306 |
await asyncio.sleep(0.5)
|
| 307 |
+
|
|
|
|
| 308 |
# from b64 to PIL image
|
| 309 |
images = [Image.open(io.BytesIO(base64.b64decode(img))) for img in images]
|
| 310 |
|