Spaces:
Sleeping
Sleeping
Update src/load_file.py
Browse files- src/load_file.py +311 -285
src/load_file.py
CHANGED
|
@@ -1,368 +1,394 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import pymongo
|
| 3 |
from datetime import datetime, date, timezone
|
| 4 |
-
import
|
| 5 |
-
from typing import List, Tuple, Optional
|
| 6 |
import requests
|
| 7 |
-
from PIL import Image
|
| 8 |
import io
|
| 9 |
from dotenv import load_dotenv
|
| 10 |
import concurrent.futures
|
| 11 |
import threading
|
| 12 |
from functools import lru_cache
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
load_dotenv()
|
|
|
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
class ImageGalleryApp:
|
| 19 |
def __init__(self, mongo_uri: str, db_name: str, collection_name: str):
|
| 20 |
-
"""Initialize
|
| 21 |
self.client = pymongo.MongoClient(mongo_uri)
|
| 22 |
self.db = self.client[db_name]
|
| 23 |
self.collection = self.db[collection_name]
|
| 24 |
-
self._categories_cache = None
|
| 25 |
-
self._filenames_cache = None
|
| 26 |
self._cache_lock = threading.Lock()
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
@lru_cache(maxsize=128)
|
| 29 |
def get_unique_categories(self) -> List[str]:
|
| 30 |
-
"""
|
| 31 |
try:
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
{"
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
]
|
| 38 |
-
categories = [doc["_id"] for doc in self.collection.aggregate(pipeline) if doc["_id"]]
|
| 39 |
-
return ["All"] + categories
|
| 40 |
except Exception as e:
|
| 41 |
print(f"Error fetching categories: {e}")
|
| 42 |
return ["All"]
|
| 43 |
|
| 44 |
@lru_cache(maxsize=128)
|
| 45 |
def get_unique_filenames(self) -> List[str]:
|
| 46 |
-
"""
|
| 47 |
try:
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
{"
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
]
|
| 54 |
-
filenames = [doc["_id"] for doc in self.collection.aggregate(pipeline) if doc["_id"]]
|
| 55 |
-
return ["All"] + filenames
|
| 56 |
except Exception as e:
|
| 57 |
print(f"Error fetching filenames: {e}")
|
| 58 |
return ["All"]
|
| 59 |
|
| 60 |
-
def load_image_from_url(self, url: str) -> Optional[Image.Image]:
|
| 61 |
-
"""Load image from URL with better error handling"""
|
| 62 |
-
try:
|
| 63 |
-
full_url = url
|
| 64 |
-
|
| 65 |
-
response = requests.get(full_url, timeout=5, stream=True) # Reduced timeout, added streaming
|
| 66 |
-
response.raise_for_status()
|
| 67 |
-
|
| 68 |
-
# Limit image size to prevent memory issues
|
| 69 |
-
image = Image.open(io.BytesIO(response.content))
|
| 70 |
-
|
| 71 |
-
# Resize large images to improve performance
|
| 72 |
-
max_size = (800, 800)
|
| 73 |
-
if image.size[0] > max_size[0] or image.size[1] > max_size[1]:
|
| 74 |
-
image.thumbnail(max_size, Image.Resampling.LANCZOS)
|
| 75 |
-
|
| 76 |
-
return image
|
| 77 |
-
except Exception as e:
|
| 78 |
-
print(f"Error loading image from {url}: {e}")
|
| 79 |
-
return None
|
| 80 |
-
|
| 81 |
-
def load_images_parallel(self, urls: List[str], max_workers: int = 5) -> List[Image.Image]:
|
| 82 |
-
"""Load multiple images in parallel"""
|
| 83 |
-
images = []
|
| 84 |
-
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
| 85 |
-
future_to_url = {executor.submit(self.load_image_from_url, url): url for url in
|
| 86 |
-
urls[:]} # Limit to first 10 URLs
|
| 87 |
-
for future in concurrent.futures.as_completed(future_to_url):
|
| 88 |
-
try:
|
| 89 |
-
image = future.result()
|
| 90 |
-
if image:
|
| 91 |
-
images.append(image)
|
| 92 |
-
except Exception as e:
|
| 93 |
-
print(f"Error in parallel loading: {e}")
|
| 94 |
-
return images
|
| 95 |
-
|
| 96 |
def parse_date_input(self, date_input) -> Optional[date]:
|
| 97 |
-
"""Convert string date input to date object"""
|
| 98 |
if not date_input or date_input == "":
|
| 99 |
return None
|
| 100 |
-
|
| 101 |
if isinstance(date_input, date):
|
| 102 |
return date_input
|
| 103 |
-
|
| 104 |
if isinstance(date_input, str):
|
| 105 |
try:
|
| 106 |
-
# Handle YYYY-MM-DD format (most common from Calendar component)
|
| 107 |
if date_input.count('-') == 2:
|
| 108 |
return datetime.strptime(date_input, '%Y-%m-%d').date()
|
| 109 |
-
# Handle MM/DD/YYYY format
|
| 110 |
elif date_input.count('/') == 2:
|
| 111 |
return datetime.strptime(date_input, '%m/%d/%Y').date()
|
| 112 |
except ValueError as e:
|
| 113 |
print(f"Error parsing date string '{date_input}': {e}")
|
| 114 |
return None
|
| 115 |
-
|
| 116 |
return None
|
| 117 |
|
| 118 |
-
def
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
# Convert string inputs to date objects
|
| 127 |
-
start_date_obj = self.parse_date_input(start_date)
|
| 128 |
-
end_date_obj = self.parse_date_input(end_date)
|
| 129 |
|
| 130 |
-
|
| 131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
"lob": lob,
|
| 133 |
-
"status": "completed",
|
| 134 |
-
"urls": {"$exists": True, "$ne": []}
|
| 135 |
}
|
| 136 |
-
|
| 137 |
if category != "All":
|
| 138 |
-
|
| 139 |
-
|
| 140 |
if file_name != "All":
|
| 141 |
-
|
| 142 |
|
| 143 |
-
|
| 144 |
-
|
|
|
|
| 145 |
date_query = {}
|
| 146 |
if start_date_obj:
|
| 147 |
-
|
| 148 |
-
# Convert date to datetime at start of day
|
| 149 |
-
start_dt = datetime.combine(start_date_obj, datetime.min.time())
|
| 150 |
-
date_query["$gte"] = start_dt
|
| 151 |
-
except Exception as e:
|
| 152 |
-
print(f"Error parsing start date: {e}")
|
| 153 |
-
|
| 154 |
if end_date_obj:
|
| 155 |
-
|
| 156 |
-
# Convert date to datetime at end of day
|
| 157 |
-
end_dt = datetime.combine(end_date_obj, datetime.max.time())
|
| 158 |
-
date_query["$lte"] = end_dt
|
| 159 |
-
except Exception as e:
|
| 160 |
-
print(f"Error parsing end date: {e}")
|
| 161 |
if date_query:
|
| 162 |
-
|
| 163 |
|
| 164 |
try:
|
| 165 |
-
|
| 166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
"category": 1,
|
| 168 |
"file_name": 1,
|
| 169 |
"created_at": 1,
|
| 170 |
-
"
|
| 171 |
-
"status": 1
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
cursor = self.collection.find(query, projection).sort("created_at", -1)
|
| 177 |
-
documents = list(cursor)
|
| 178 |
-
|
| 179 |
-
if not documents:
|
| 180 |
-
return [], "No records found matching the criteria."
|
| 181 |
-
|
| 182 |
-
# Collect all URLs for parallel loading
|
| 183 |
-
all_urls = []
|
| 184 |
-
url_to_doc = {}
|
| 185 |
-
|
| 186 |
-
for doc in documents:
|
| 187 |
-
urls = doc.get("urls", [])
|
| 188 |
-
if urls:
|
| 189 |
-
|
| 190 |
-
for url in urls:
|
| 191 |
-
first_url = url
|
| 192 |
-
all_urls.append(first_url)
|
| 193 |
-
url_to_doc[first_url] = doc
|
| 194 |
-
|
| 195 |
-
# Load images in parallel
|
| 196 |
-
print(f"Loading {len(all_urls)} images...")
|
| 197 |
-
images = self.load_images_parallel(all_urls, max_workers=8)
|
| 198 |
-
|
| 199 |
-
# Build info text
|
| 200 |
-
info_text = f"Found {len(documents)} records (showing {len(images)} images)\n"
|
| 201 |
-
info_text += f"Filter: Status = completed, LOB = {lob}\n"
|
| 202 |
-
if start_date_obj:
|
| 203 |
-
info_text += f"Start Date: {start_date_obj}\n"
|
| 204 |
-
if end_date_obj:
|
| 205 |
-
info_text += f"End Date: {end_date_obj}\n"
|
| 206 |
-
info_text += "\n"
|
| 207 |
-
|
| 208 |
-
for i, doc in enumerate(documents[:len(images)]):
|
| 209 |
-
info_text += f"#{i + 1}\n"
|
| 210 |
-
info_text += f"Category: {doc.get('category', 'N/A')}\n"
|
| 211 |
-
info_text += f"File: {doc.get('file_name', 'N/A')}\n"
|
| 212 |
-
info_text += f"Prompt: {doc.get('prompt', 'N/A')}\n"
|
| 213 |
-
info_text += f"Created: {doc.get('created_at', 'N/A')}\n"
|
| 214 |
-
info_text += f"URLs: {len(doc.get('urls', []))} image(s)\n"
|
| 215 |
-
info_text += f"Status: {doc.get('status', 'N/A')}\n"
|
| 216 |
-
info_text += "-" * 30 + "\n"
|
| 217 |
-
|
| 218 |
-
return images, info_text
|
| 219 |
-
|
| 220 |
except Exception as e:
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
|
| 226 |
def create_streamlit_app(mongo_uri: str, db_name: str, collection_name: str):
|
|
|
|
| 227 |
app = ImageGalleryApp(mongo_uri, db_name, collection_name)
|
|
|
|
| 228 |
def get_filter_choices():
|
|
|
|
| 229 |
try:
|
| 230 |
categories = app.get_unique_categories()
|
| 231 |
filenames = app.get_unique_filenames()
|
| 232 |
return categories, filenames
|
| 233 |
-
except Exception
|
| 234 |
return ["All"], ["All"]
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
|
|
|
| 254 |
st.markdown("""
|
| 255 |
<style>
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
}
|
| 261 |
-
.tab {
|
| 262 |
-
margin-right: 32px;
|
| 263 |
-
padding-bottom: 6px;
|
| 264 |
-
color: #02050d;
|
| 265 |
-
cursor: pointer;
|
| 266 |
-
}
|
| 267 |
-
.tab.selected {
|
| 268 |
-
color: #FF7300;
|
| 269 |
-
border-bottom: 3px solid #FF7300;
|
| 270 |
-
font-weight: bold;
|
| 271 |
-
}
|
| 272 |
-
/* Button Styles */
|
| 273 |
-
.orange-btn > button {
|
| 274 |
-
background-color: #FF7300 !important;
|
| 275 |
-
color: white !important;
|
| 276 |
-
font-weight: bold !important;
|
| 277 |
-
width: 100% !important;
|
| 278 |
-
border-radius: 8px !important;
|
| 279 |
-
font-size: 20px !important;
|
| 280 |
-
height: 48px !important;
|
| 281 |
-
margin-top: 8px !important;
|
| 282 |
-
}
|
| 283 |
-
.grey-btn > button {
|
| 284 |
-
background-color: #ECECEC !important;
|
| 285 |
-
color: #010203!important;
|
| 286 |
-
font-weight: 500 !important;
|
| 287 |
-
border-radius: 8px !important;
|
| 288 |
-
font-size: 18px !important;
|
| 289 |
-
height: 48px !important;
|
| 290 |
-
}
|
| 291 |
-
/* Card style */
|
| 292 |
-
.filter-card {
|
| 293 |
-
background: #fff;
|
| 294 |
border-radius: 10px;
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
""", unsafe_allow_html=True)
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
with
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 325 |
with col_btn1:
|
| 326 |
-
|
| 327 |
-
search_clicked = st.button("🔍 Search Images", key="search", use_container_width=True)
|
| 328 |
with col_btn2:
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
st.
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
st.text_area(
|
| 346 |
-
"Document Information",
|
| 347 |
-
value=st.session_state.get("info", ""),
|
| 348 |
-
height=450,
|
| 349 |
-
disabled=True,
|
| 350 |
-
key="info_text"
|
| 351 |
-
)
|
| 352 |
-
# --- Event handlers ---
|
| 353 |
if search_clicked:
|
| 354 |
-
|
| 355 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
)
|
| 357 |
-
st.session_state["
|
| 358 |
-
|
| 359 |
-
st.session_state["
|
| 360 |
-
|
| 361 |
-
st.
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pymongo
|
| 3 |
from datetime import datetime, date, timezone
|
| 4 |
+
from typing import List, Tuple, Optional, Dict
|
|
|
|
| 5 |
import requests
|
| 6 |
+
from PIL import Image, ImageFile
|
| 7 |
import io
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
import concurrent.futures
|
| 10 |
import threading
|
| 11 |
from functools import lru_cache
|
| 12 |
+
import hashlib
|
| 13 |
+
import contextlib
|
| 14 |
+
from requests.adapters import HTTPAdapter
|
| 15 |
+
from urllib3.util.retry import Retry
|
| 16 |
|
| 17 |
load_dotenv()
|
| 18 |
+
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
| 19 |
|
| 20 |
+
@st.cache_data(show_spinner=False, ttl=60 * 30)
|
| 21 |
+
def _download_bytes_cached(url: str, timeout_s: float = 12.0) -> Optional[bytes]:
|
| 22 |
+
"""Download image bytes from URL with Streamlit cache (30 min TTL)."""
|
| 23 |
+
try:
|
| 24 |
+
r = requests.get(url, timeout=(3.05, timeout_s), stream=True)
|
| 25 |
+
r.raise_for_status()
|
| 26 |
+
return r.content
|
| 27 |
+
except Exception:
|
| 28 |
+
return None
|
| 29 |
|
| 30 |
class ImageGalleryApp:
|
| 31 |
def __init__(self, mongo_uri: str, db_name: str, collection_name: str):
|
| 32 |
+
"""Initialize MongoDB and HTTP session with retry pooling."""
|
| 33 |
self.client = pymongo.MongoClient(mongo_uri)
|
| 34 |
self.db = self.client[db_name]
|
| 35 |
self.collection = self.db[collection_name]
|
|
|
|
|
|
|
| 36 |
self._cache_lock = threading.Lock()
|
| 37 |
|
| 38 |
+
self.session = requests.Session()
|
| 39 |
+
retries = Retry(
|
| 40 |
+
total=3,
|
| 41 |
+
connect=3,
|
| 42 |
+
read=3,
|
| 43 |
+
backoff_factor=0.4,
|
| 44 |
+
status_forcelist=[429, 500, 502, 503, 504],
|
| 45 |
+
allowed_methods=["GET", "HEAD"]
|
| 46 |
+
)
|
| 47 |
+
adapter = HTTPAdapter(pool_connections=64, pool_maxsize=64, max_retries=retries)
|
| 48 |
+
self.session.mount("http://", adapter)
|
| 49 |
+
self.session.mount("https://", adapter)
|
| 50 |
+
|
| 51 |
+
self.thumb_max_size = (768, 768)
|
| 52 |
+
|
| 53 |
@lru_cache(maxsize=128)
|
| 54 |
def get_unique_categories(self) -> List[str]:
|
| 55 |
+
"""Return all unique categories from DB."""
|
| 56 |
try:
|
| 57 |
+
categories = self.collection.distinct(
|
| 58 |
+
"category",
|
| 59 |
+
{"status": "completed", "lob": "leadgen_vivek", "category": {"$ne": None}}
|
| 60 |
+
)
|
| 61 |
+
return ["All"] + sorted(categories)
|
|
|
|
|
|
|
|
|
|
| 62 |
except Exception as e:
|
| 63 |
print(f"Error fetching categories: {e}")
|
| 64 |
return ["All"]
|
| 65 |
|
| 66 |
@lru_cache(maxsize=128)
|
| 67 |
def get_unique_filenames(self) -> List[str]:
|
| 68 |
+
"""Return all unique file names from DB."""
|
| 69 |
try:
|
| 70 |
+
filenames = self.collection.distinct(
|
| 71 |
+
"file_name",
|
| 72 |
+
{"status": "completed", "lob": "leadgen_vivek", "file_name": {"$ne": None}}
|
| 73 |
+
)
|
| 74 |
+
return ["All"] + sorted(filenames)
|
|
|
|
|
|
|
|
|
|
| 75 |
except Exception as e:
|
| 76 |
print(f"Error fetching filenames: {e}")
|
| 77 |
return ["All"]
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
def parse_date_input(self, date_input) -> Optional[date]:
|
| 80 |
+
"""Convert string or date input to date object."""
|
| 81 |
if not date_input or date_input == "":
|
| 82 |
return None
|
|
|
|
| 83 |
if isinstance(date_input, date):
|
| 84 |
return date_input
|
|
|
|
| 85 |
if isinstance(date_input, str):
|
| 86 |
try:
|
|
|
|
| 87 |
if date_input.count('-') == 2:
|
| 88 |
return datetime.strptime(date_input, '%Y-%m-%d').date()
|
|
|
|
| 89 |
elif date_input.count('/') == 2:
|
| 90 |
return datetime.strptime(date_input, '%m/%d/%Y').date()
|
| 91 |
except ValueError as e:
|
| 92 |
print(f"Error parsing date string '{date_input}': {e}")
|
| 93 |
return None
|
|
|
|
| 94 |
return None
|
| 95 |
|
| 96 |
+
def load_image_from_url(self, url: str) -> Optional[Image.Image]:
|
| 97 |
+
"""Load and thumbnail image from URL with caching and pooling."""
|
| 98 |
+
try:
|
| 99 |
+
data = _download_bytes_cached(url)
|
| 100 |
+
if data is None:
|
| 101 |
+
r = self.session.get(url, timeout=(3.05, 12), stream=True)
|
| 102 |
+
r.raise_for_status()
|
| 103 |
+
data = r.content
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
+
img = Image.open(io.BytesIO(data))
|
| 106 |
+
with contextlib.suppress(Exception):
|
| 107 |
+
img = img.convert("RGB")
|
| 108 |
+
|
| 109 |
+
if img.size[0] > self.thumb_max_size[0] or img.size[1] > self.thumb_max_size[1]:
|
| 110 |
+
img.thumbnail(self.thumb_max_size, Image.Resampling.LANCZOS)
|
| 111 |
+
|
| 112 |
+
return img
|
| 113 |
+
except Exception as e:
|
| 114 |
+
print(f"Error loading image from {url}: {e}")
|
| 115 |
+
return None
|
| 116 |
+
|
| 117 |
+
def load_images_parallel(self, urls: List[str], max_workers: int = 8) -> List[Tuple[str, Optional[Image.Image]]]:
|
| 118 |
+
"""Load multiple images in parallel; returns list of (url, image)."""
|
| 119 |
+
results: List[Tuple[str, Optional[Image.Image]]] = []
|
| 120 |
+
if not urls:
|
| 121 |
+
return results
|
| 122 |
+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as ex:
|
| 123 |
+
future_map = {ex.submit(self.load_image_from_url, u): u for u in urls}
|
| 124 |
+
for fut in concurrent.futures.as_completed(future_map):
|
| 125 |
+
u = future_map[fut]
|
| 126 |
+
try:
|
| 127 |
+
img = fut.result()
|
| 128 |
+
except Exception as e:
|
| 129 |
+
print(f"Error in parallel loading: {e}")
|
| 130 |
+
img = None
|
| 131 |
+
results.append((u, img))
|
| 132 |
+
return results
|
| 133 |
+
|
| 134 |
+
def search_images_page(
|
| 135 |
+
self,
|
| 136 |
+
category: str = "All",
|
| 137 |
+
file_name: str = "All",
|
| 138 |
+
start_date: Optional[date] = None,
|
| 139 |
+
end_date: Optional[date] = None,
|
| 140 |
+
lob: str = "leadgen_vivek",
|
| 141 |
+
page: int = 0,
|
| 142 |
+
page_size: int = 24,
|
| 143 |
+
) -> Tuple[List[Dict], int]:
|
| 144 |
+
"""
|
| 145 |
+
Paginated search for images by filters.
|
| 146 |
+
Returns:
|
| 147 |
+
- docs: list of {url, category, file_name, created_at, prompt, status}
|
| 148 |
+
- total: total number of image URLs matching the filters
|
| 149 |
+
"""
|
| 150 |
+
match = {
|
| 151 |
"lob": lob,
|
| 152 |
+
"status": "completed",
|
| 153 |
+
"urls": {"$exists": True, "$ne": []}
|
| 154 |
}
|
|
|
|
| 155 |
if category != "All":
|
| 156 |
+
match["category"] = category
|
|
|
|
| 157 |
if file_name != "All":
|
| 158 |
+
match["file_name"] = file_name
|
| 159 |
|
| 160 |
+
start_date_obj = self.parse_date_input(start_date)
|
| 161 |
+
end_date_obj = self.parse_date_input(end_date)
|
| 162 |
+
if start_date_obj or end_date_obj:
|
| 163 |
date_query = {}
|
| 164 |
if start_date_obj:
|
| 165 |
+
date_query["$gte"] = datetime.combine(start_date_obj, datetime.min.time())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
if end_date_obj:
|
| 167 |
+
date_query["$lte"] = datetime.combine(end_date_obj, datetime.max.time())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
if date_query:
|
| 169 |
+
match["created_at"] = date_query
|
| 170 |
|
| 171 |
try:
|
| 172 |
+
count_pipeline = [
|
| 173 |
+
{"$match": match},
|
| 174 |
+
{"$unwind": "$urls"},
|
| 175 |
+
{"$count": "n"}
|
| 176 |
+
]
|
| 177 |
+
count_doc = list(self.collection.aggregate(count_pipeline))
|
| 178 |
+
total = count_doc[0]["n"] if count_doc else 0
|
| 179 |
+
except Exception as e:
|
| 180 |
+
print(f"Count error: {e}")
|
| 181 |
+
total = 0
|
| 182 |
+
|
| 183 |
+
if total == 0:
|
| 184 |
+
return [], 0
|
| 185 |
+
|
| 186 |
+
pipeline = [
|
| 187 |
+
{"$match": match},
|
| 188 |
+
{"$unwind": "$urls"},
|
| 189 |
+
{"$sort": {"created_at": -1}},
|
| 190 |
+
{"$skip": max(0, page) * max(1, page_size)},
|
| 191 |
+
{"$limit": max(1, page_size)},
|
| 192 |
+
{"$project": {
|
| 193 |
+
"_id": 0,
|
| 194 |
+
"url": "$urls",
|
| 195 |
"category": 1,
|
| 196 |
"file_name": 1,
|
| 197 |
"created_at": 1,
|
| 198 |
+
"prompt": 1,
|
| 199 |
+
"status": 1
|
| 200 |
+
}}
|
| 201 |
+
]
|
| 202 |
+
try:
|
| 203 |
+
docs = list(self.collection.aggregate(pipeline, allowDiskUse=True))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
except Exception as e:
|
| 205 |
+
print(f"Aggregation error: {e}")
|
| 206 |
+
docs = []
|
| 207 |
+
return docs, total
|
|
|
|
| 208 |
|
| 209 |
def create_streamlit_app(mongo_uri: str, db_name: str, collection_name: str):
|
| 210 |
+
"""Main Streamlit UI for image gallery."""
|
| 211 |
app = ImageGalleryApp(mongo_uri, db_name, collection_name)
|
| 212 |
+
|
| 213 |
def get_filter_choices():
|
| 214 |
+
"""Fetch filter choices for category and filename."""
|
| 215 |
try:
|
| 216 |
categories = app.get_unique_categories()
|
| 217 |
filenames = app.get_unique_filenames()
|
| 218 |
return categories, filenames
|
| 219 |
+
except Exception:
|
| 220 |
return ["All"], ["All"]
|
| 221 |
+
|
| 222 |
+
# Session state defaults
|
| 223 |
+
if "categories_list" not in st.session_state:
|
| 224 |
+
st.session_state["categories_list"], st.session_state["filenames_list"] = get_filter_choices()
|
| 225 |
+
|
| 226 |
+
st.session_state.setdefault("selected_category", "All")
|
| 227 |
+
st.session_state.setdefault("selected_filename", "All")
|
| 228 |
+
st.session_state.setdefault("selected_lob", "leadgen_vivek")
|
| 229 |
+
|
| 230 |
+
today = datetime.now(timezone.utc).date()
|
| 231 |
+
st.session_state.setdefault("use_date_filter", True)
|
| 232 |
+
st.session_state.setdefault("selected_start_date", today)
|
| 233 |
+
st.session_state.setdefault("selected_end_date", today)
|
| 234 |
+
|
| 235 |
+
st.session_state.setdefault("page", 0)
|
| 236 |
+
st.session_state.setdefault("page_size", 24)
|
| 237 |
+
st.session_state.setdefault("last_query_total", 0)
|
| 238 |
+
st.session_state.setdefault("did_search", False)
|
| 239 |
+
|
| 240 |
+
# Custom styles for UI
|
| 241 |
st.markdown("""
|
| 242 |
<style>
|
| 243 |
+
.orange-btn > button { background:#FF7300 !important; color:#fff !important; font-weight:600 !important; }
|
| 244 |
+
.skel {
|
| 245 |
+
width: 100%;
|
| 246 |
+
aspect-ratio: 1 / 1;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
border-radius: 10px;
|
| 248 |
+
background: linear-gradient(90deg, #eee 25%, #f5f5f5 37%, #eee 63%);
|
| 249 |
+
background-size: 400% 100%;
|
| 250 |
+
animation: shimmer 1.4s ease infinite;
|
| 251 |
+
}
|
| 252 |
+
@keyframes shimmer {
|
| 253 |
+
0% { background-position: 0% 0; }
|
| 254 |
+
100% { background-position: -135% 0; }
|
| 255 |
+
}
|
| 256 |
+
</style>
|
| 257 |
""", unsafe_allow_html=True)
|
| 258 |
+
|
| 259 |
+
# Filter controls
|
| 260 |
+
col1, col2, col3 = st.columns([1,1,1])
|
| 261 |
+
with col1:
|
| 262 |
+
category = st.selectbox(
|
| 263 |
+
"Category",
|
| 264 |
+
options=st.session_state["categories_list"],
|
| 265 |
+
index=st.session_state["categories_list"].index(st.session_state["selected_category"])
|
| 266 |
+
if st.session_state["selected_category"] in st.session_state["categories_list"] else 0
|
| 267 |
+
)
|
| 268 |
+
with col2:
|
| 269 |
+
filename = st.selectbox(
|
| 270 |
+
"File Name",
|
| 271 |
+
options=st.session_state["filenames_list"],
|
| 272 |
+
index=st.session_state["filenames_list"].index(st.session_state["selected_filename"])
|
| 273 |
+
if st.session_state["selected_filename"] in st.session_state["filenames_list"] else 0
|
| 274 |
+
)
|
| 275 |
+
with col3:
|
| 276 |
+
lob = st.text_input("LOB (Line of Business)", value=st.session_state["selected_lob"])
|
| 277 |
+
|
| 278 |
+
coldf = st.columns([1,1,1])
|
| 279 |
+
with coldf[0]:
|
| 280 |
+
use_date_filter = st.checkbox("Filter by date", value=st.session_state["use_date_filter"])
|
| 281 |
+
with coldf[1]:
|
| 282 |
+
start_date = st.date_input(
|
| 283 |
+
"Start Date",
|
| 284 |
+
value=st.session_state["selected_start_date"],
|
| 285 |
+
disabled=not use_date_filter
|
| 286 |
+
)
|
| 287 |
+
with coldf[2]:
|
| 288 |
+
end_date = st.date_input(
|
| 289 |
+
"End Date",
|
| 290 |
+
value=st.session_state["selected_end_date"],
|
| 291 |
+
disabled=not use_date_filter
|
| 292 |
+
)
|
| 293 |
+
|
| 294 |
+
col_misc = st.columns([1,1,1,2])
|
| 295 |
+
with col_misc[0]:
|
| 296 |
+
page_size = st.selectbox("Images per page", [8, 12, 16, 24, 32, 48],
|
| 297 |
+
index=[8, 12, 16, 24, 32, 48].index(st.session_state["page_size"]))
|
| 298 |
+
|
| 299 |
+
col_btn1, col_btn2, col_btn3 = st.columns([2,2,2])
|
| 300 |
with col_btn1:
|
| 301 |
+
search_clicked = st.button("🔍 Search", use_container_width=True)
|
|
|
|
| 302 |
with col_btn2:
|
| 303 |
+
refresh_clicked = st.button("🔄 Refresh Filters", use_container_width=True)
|
| 304 |
+
with col_btn3:
|
| 305 |
+
reset_clicked = st.button("♻️ Reset Page", use_container_width=True)
|
| 306 |
+
|
| 307 |
+
# Button events
|
| 308 |
+
if refresh_clicked:
|
| 309 |
+
st.session_state["categories_list"], st.session_state["filenames_list"] = get_filter_choices()
|
| 310 |
+
st.session_state["selected_category"] = "All"
|
| 311 |
+
st.session_state["selected_filename"] = "All"
|
| 312 |
+
st.session_state["page"] = 0
|
| 313 |
+
st.rerun()
|
| 314 |
+
|
| 315 |
+
if reset_clicked:
|
| 316 |
+
st.session_state["page"] = 0
|
| 317 |
+
st.rerun()
|
| 318 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
if search_clicked:
|
| 320 |
+
st.session_state["selected_category"] = category
|
| 321 |
+
st.session_state["selected_filename"] = filename
|
| 322 |
+
st.session_state["selected_lob"] = lob
|
| 323 |
+
st.session_state["use_date_filter"] = use_date_filter
|
| 324 |
+
st.session_state["selected_start_date"] = start_date
|
| 325 |
+
st.session_state["selected_end_date"] = end_date
|
| 326 |
+
st.session_state["page_size"] = page_size
|
| 327 |
+
st.session_state["page"] = 0
|
| 328 |
+
st.session_state["did_search"] = True
|
| 329 |
+
st.rerun()
|
| 330 |
+
|
| 331 |
+
# Results display
|
| 332 |
+
if st.session_state["did_search"]:
|
| 333 |
+
_start = st.session_state["selected_start_date"] if st.session_state["use_date_filter"] else None
|
| 334 |
+
_end = st.session_state["selected_end_date"] if st.session_state["use_date_filter"] else None
|
| 335 |
+
|
| 336 |
+
docs, total = app.search_images_page(
|
| 337 |
+
category=st.session_state["selected_category"],
|
| 338 |
+
file_name=st.session_state["selected_filename"],
|
| 339 |
+
start_date=_start,
|
| 340 |
+
end_date=_end,
|
| 341 |
+
lob=st.session_state["selected_lob"],
|
| 342 |
+
page=st.session_state["page"],
|
| 343 |
+
page_size=st.session_state["page_size"]
|
| 344 |
)
|
| 345 |
+
st.session_state["last_query_total"] = total
|
| 346 |
+
|
| 347 |
+
total_pages = max(1, (total + st.session_state["page_size"] - 1) // st.session_state["page_size"])
|
| 348 |
+
|
| 349 |
+
nav1, nav2, nav3 = st.columns([1,2,1])
|
| 350 |
+
with nav1:
|
| 351 |
+
if st.button("⬅️ Prev", disabled=(st.session_state["page"] <= 0)):
|
| 352 |
+
st.session_state["page"] -= 1
|
| 353 |
+
st.rerun()
|
| 354 |
+
with nav2:
|
| 355 |
+
st.markdown(
|
| 356 |
+
f"<div style='text-align:center'>Page <b>{st.session_state['page']+1}</b> of <b>{total_pages}</b>"
|
| 357 |
+
f" · <b>{total}</b> images total</div>",
|
| 358 |
+
unsafe_allow_html=True
|
| 359 |
+
)
|
| 360 |
+
with nav3:
|
| 361 |
+
if st.button("Next ➡️", disabled=(st.session_state["page"] >= total_pages - 1)):
|
| 362 |
+
st.session_state["page"] += 1
|
| 363 |
+
st.rerun()
|
| 364 |
+
|
| 365 |
+
st.divider()
|
| 366 |
+
|
| 367 |
+
if total == 0 or not docs:
|
| 368 |
+
st.info("No images found for the current filters.")
|
| 369 |
+
return
|
| 370 |
+
|
| 371 |
+
st.markdown("#### Images")
|
| 372 |
+
cols = st.columns(4)
|
| 373 |
+
|
| 374 |
+
# Show skeletons while loading images
|
| 375 |
+
placeholders = []
|
| 376 |
+
for i, _ in enumerate(docs):
|
| 377 |
+
ph = cols[i % 4].empty()
|
| 378 |
+
ph.markdown("<div class='skel'></div>", unsafe_allow_html=True)
|
| 379 |
+
placeholders.append(ph)
|
| 380 |
+
|
| 381 |
+
urls = [d["url"] for d in docs]
|
| 382 |
+
loaded = app.load_images_parallel(urls, max_workers=8)
|
| 383 |
+
url_to_img = {u: img for (u, img) in loaded}
|
| 384 |
+
|
| 385 |
+
# Fill placeholders with loaded images
|
| 386 |
+
for i, d in enumerate(docs):
|
| 387 |
+
img = url_to_img.get(d["url"])
|
| 388 |
+
meta = f"{d.get('category','N/A')} | {d.get('file_name','N/A')} | {d.get('created_at','')}"
|
| 389 |
+
if img:
|
| 390 |
+
placeholders[i].image(img, use_container_width=True, caption=meta)
|
| 391 |
+
else:
|
| 392 |
+
placeholders[i].warning("Failed to load image")
|
| 393 |
+
else:
|
| 394 |
+
st.info("Set your filters and click **Search** to load images.")
|