habulaj commited on
Commit
e4d6d42
·
verified ·
1 Parent(s): 5660981

Update routers/search.py

Browse files
Files changed (1) hide show
  1. routers/search.py +339 -345
routers/search.py CHANGED
@@ -12,415 +12,409 @@ import struct
12
  from typing import Optional, Tuple, List, Dict
13
  import base64
14
  from functools import lru_cache
 
 
15
  import time
16
- from concurrent.futures import ProcessPoolExecutor
17
- import multiprocessing
18
-
19
- # Use uvloop se disponível para melhor performance async
20
- try:
21
- import uvloop
22
- uvloop.install()
23
- except ImportError:
24
- pass
25
-
26
- def _init_worker():
27
- """Inicializa worker do processo filho"""
28
- # Configurações específicas do worker se necessário
29
- import signal
30
- signal.signal(signal.SIGINT, signal.SIG_IGN)
31
 
32
  router = APIRouter()
33
 
34
- # Pool de processos para thumbnails (mais eficiente que threads para CPU-bound)
35
- _process_pool = ProcessPoolExecutor(
36
- max_workers=min(multiprocessing.cpu_count(), 8),
37
- initializer=_init_worker
38
  )
39
 
40
- # Cache otimizado com TTL implícito
41
  _url_cache = {}
42
- _cache_timestamps = {}
43
- _cache_max_size = 2000
44
- _cache_ttl = 3600 # 1 hora
45
 
46
- @lru_cache(maxsize=1000)
47
- def _clean_wikimedia_url(url: str) -> str:
48
- """Cache LRU para limpeza de URLs Wikimedia"""
49
- if 'wikimedia.org' not in url or '/thumb/' not in url:
50
- return url
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  try:
53
- idx = url.find('/thumb/')
54
- if idx == -1:
55
- return url
56
 
57
- before_thumb = url[:idx]
58
- after_thumb = url[idx + 6:]
59
- slash_count = 0
60
- end_idx = 0
61
 
62
- for i, char in enumerate(after_thumb):
63
- if char == '/':
64
- slash_count += 1
65
- if slash_count == 3:
66
- end_idx = i
67
- break
68
 
69
- if slash_count >= 3:
70
- original_path = after_thumb[:end_idx]
71
- return f"{before_thumb}/{original_path}"
72
- except:
73
- pass
74
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  return url
76
 
77
- # Regex pré-compilado para máxima performance
78
- _IMAGE_PATTERN = re.compile(
79
- r'https?://[^\s"\'<>]+?\.(?:jpg|png|webp|jpeg)(?:\?[^\s"\'<>]*)?',
80
- re.IGNORECASE | re.MULTILINE
81
- )
82
 
83
- def _extract_images_vectorized(response_text: str) -> List[str]:
84
- """Extração vetorizada ultra-rápida"""
85
- # Encontra todas as URLs
86
- raw_urls = _IMAGE_PATTERN.findall(response_text)
 
 
87
 
88
- # Deduplicação com set comprehension
89
- seen = set()
90
- unique_urls = []
91
 
92
- for url in raw_urls[:100]: # Limita para 100 URLs máximo
93
- cleaned = _clean_wikimedia_url(url)
94
- if cleaned not in seen and len(cleaned) > 20: # URLs válidas são maiores
95
- seen.add(cleaned)
96
- unique_urls.append(cleaned)
97
-
98
- if len(unique_urls) >= 80: # Para cedo se já temos suficientes
99
- break
100
 
101
- return unique_urls
 
 
 
 
 
 
 
102
 
103
- # Estruturas de dados otimizadas para parsing
104
- _JPEG_MARKERS = frozenset([b'\xff\xc0', b'\xff\xc1', b'\xff\xc2', b'\xff\xc3'])
105
- _PNG_SIGNATURE = b'\x89PNG\r\n\x1a\n'
106
- _WEBP_SIGNATURE = b'RIFF'
107
 
108
- def _get_image_dimensions_lightning(data: bytes) -> Optional[Tuple[int, int]]:
109
- """Parser de dimensões ultra-otimizado"""
 
 
110
  if len(data) < 24:
111
  return None
112
 
113
- # JPEG - busca otimizada
114
- if data[0:2] == b'\xff\xd8':
115
- # Busca vetorizada por markers
116
- for i in range(2, min(len(data) - 8, 800)):
117
- if data[i:i+2] in _JPEG_MARKERS:
118
- try:
119
- h = (data[i+5] << 8) | data[i+6]
120
- w = (data[i+7] << 8) | data[i+8]
121
- if 50 < w < 50000 and 50 < h < 50000: # Sanity check
122
- return w, h
123
- except IndexError:
124
- break
125
-
126
- # PNG - acesso direto otimizado
127
- elif data[:8] == _PNG_SIGNATURE:
128
- try:
129
- w = struct.unpack('>I', data[16:20])[0]
130
- h = struct.unpack('>I', data[20:24])[0]
131
- if 50 < w < 50000 and 50 < h < 50000:
132
- return w, h
133
- except (struct.error, IndexError):
134
- pass
135
-
136
- # WebP - parsing simplificado
137
- elif data[:4] == _WEBP_SIGNATURE and len(data) >= 30:
138
- try:
139
  if data[12:16] == b'VP8 ':
140
- w = struct.unpack('<H', data[26:28])[0] & 0x3fff
141
- h = struct.unpack('<H', data[28:30])[0] & 0x3fff
142
- if 50 < w < 50000 and 50 < h < 50000:
143
- return w, h
144
- except (struct.error, IndexError):
145
- pass
146
 
147
  return None
148
 
149
- def _create_thumbnail_process(image_data: bytes, max_size: int = 150) -> Optional[str]:
150
- """Função para processamento em processo separado"""
 
 
 
151
  if not image_data or len(image_data) < 100:
152
  return None
153
 
154
  try:
155
- # PIL otimizado
156
- image = Image.open(io.BytesIO(image_data))
157
-
158
- # Conversão ultra-rápida
159
- if image.mode not in ('RGB', 'L'):
160
- if image.mode in ('RGBA', 'LA', 'P'):
161
- # Background branco mais eficiente
162
- bg = Image.new('RGB', image.size, (255, 255, 255))
163
- if image.mode == 'P':
164
- image = image.convert('RGBA')
165
- bg.paste(image, mask=image.split()[-1] if 'A' in image.mode else None)
166
- image = bg
167
- else:
168
- image = image.convert('RGB')
169
-
170
- # Cálculo de proporções com operações inteiras
171
- w, h = image.size
172
- if w <= max_size and h <= max_size:
173
- thumbnail = image # Não redimensiona se já é pequena
174
- else:
175
- # Proporções com divisão inteira otimizada
176
  if w > h:
177
- new_h = (h * max_size) // w
178
- new_w = max_size if new_h > 0 else max_size
179
- new_h = max(1, new_h)
180
  else:
181
- new_w = (w * max_size) // h
182
- new_h = max_size if new_w > 0 else max_size
183
- new_w = max(1, new_w)
184
 
185
- # Resize com filtro mais rápido
186
- thumbnail = image.resize((new_w, new_h), Image.Resampling.NEAREST)
187
-
188
- # Compressão otimizada
189
- buffer = io.BytesIO()
190
- thumbnail.save(buffer,
191
- format='JPEG',
192
- quality=75,
193
- optimize=False,
194
- progressive=False)
195
-
196
- return base64.b64encode(buffer.getvalue()).decode('ascii')
197
-
198
- except Exception:
199
  return None
200
 
201
- def _is_cache_valid(key: str) -> bool:
202
- """Verifica se cache ainda é válido"""
203
- if key not in _cache_timestamps:
204
- return False
205
- return (time.time() - _cache_timestamps[key]) < _cache_ttl
206
 
207
- def _clean_cache():
208
- """Limpeza inteligente do cache"""
209
- if len(_url_cache) <= _cache_max_size:
210
- return
 
 
 
 
211
 
212
- current_time = time.time()
213
- expired_keys = [
214
- key for key, timestamp in _cache_timestamps.items()
215
- if (current_time - timestamp) > _cache_ttl
216
- ]
217
 
218
- for key in expired_keys:
219
- _url_cache.pop(key, None)
220
- _cache_timestamps.pop(key, None)
221
-
222
- # Cliente HTTP reutilizável com configurações otimizadas
223
- _http_client = None
224
-
225
- async def _get_http_client():
226
- """Cliente HTTP singleton otimizado"""
227
- global _http_client
228
- if _http_client is None:
229
- _http_client = httpx.AsyncClient(
230
- timeout=httpx.Timeout(8.0, connect=3.0),
231
- limits=httpx.Limits(
232
- max_keepalive_connections=50,
233
- max_connections=80,
234
- keepalive_expiry=30.0
235
- ),
236
- http2=False,
237
- headers={
238
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
239
- 'Accept': 'image/*,*/*;q=0.8',
240
- 'Accept-Encoding': 'gzip, deflate',
241
- 'Connection': 'keep-alive'
242
- }
243
- )
244
- return _http_client
245
-
246
- async def _process_image_ultra_fast(url: str, include_thumbnail: bool, semaphore: asyncio.Semaphore) -> Dict:
247
- """Processamento ultra-otimizado de uma imagem"""
248
- async with semaphore:
249
- # Cache check
250
- cache_key = f"{url}_{include_thumbnail}"
251
- if _is_cache_valid(cache_key):
252
- return _url_cache[cache_key].copy()
253
-
254
- client = await _get_http_client()
255
- clean_url = url.replace('\\u003d', '=').replace('\\u0026', '&').replace('\\\\', '').replace('\\/', '/')
256
-
257
- width, height, thumbnail_b64 = None, None, None
258
 
259
- try:
260
- # Estratégia de range otimizada
261
- range_size = 16384 if include_thumbnail else 4096
262
- headers = {'Range': f'bytes=0-{range_size}'}
263
-
264
- response = await client.get(clean_url, headers=headers, timeout=5.0)
265
-
266
- if response.status_code in (200, 206) and len(response.content) > 200:
267
- data = response.content
268
-
269
- # Parsing de dimensões
270
- dimensions = _get_image_dimensions_lightning(data)
271
- if dimensions:
272
- width, height = dimensions
273
-
274
- # Thumbnail em processo separado se necessário
275
- if include_thumbnail and len(data) > 1000:
276
- loop = asyncio.get_event_loop()
277
- thumbnail_b64 = await loop.run_in_executor(
278
- _process_pool,
279
- _create_thumbnail_process,
280
- data
281
- )
282
 
283
- # Se não conseguiu dimensões, tenta download maior
284
- if not width and response.status_code == 206:
285
- try:
286
- response = await client.get(clean_url, timeout=6.0)
287
- if response.status_code == 200 and len(response.content) < 1500000:
288
- dimensions = _get_image_dimensions_lightning(response.content[:50000])
 
289
  if dimensions:
290
  width, height = dimensions
 
 
 
 
 
 
 
 
 
 
 
 
 
291
 
292
- if include_thumbnail and not thumbnail_b64:
293
- loop = asyncio.get_event_loop()
294
- thumbnail_b64 = await loop.run_in_executor(
295
- _process_pool,
296
- _create_thumbnail_process,
297
- response.content
298
- )
299
- except:
300
- pass
301
 
302
- except Exception:
303
- pass
304
-
305
- # Resultado
306
- result = {"url": clean_url, "width": width, "height": height}
307
- if include_thumbnail:
308
- result["thumbnail"] = f"data:image/jpeg;base64,{thumbnail_b64}" if thumbnail_b64 else None
309
-
310
- # Cache result
311
- if width and height:
312
- _clean_cache()
313
- if len(_url_cache) < _cache_max_size:
314
- _url_cache[cache_key] = result.copy()
315
- _cache_timestamps[cache_key] = time.time()
316
-
317
- return result
318
-
319
- @router.get("/search")
320
- async def search(
321
- q: str = Query(..., description="Termo de pesquisa para imagens"),
322
- min_width: int = Query(1200, description="Largura mínima das imagens"),
323
- include_thumbnails: bool = Query(True, description="Incluir miniaturas base64")
324
- ):
325
- """Busca imagens com performance máxima"""
326
 
327
- # Google Images URL otimizada
328
- params = {
329
- "tbm": "isch",
330
- "q": q,
331
- "tbs": "isz:l,iar:w", # Imagens grandes, formato wide
332
- "safe": "off"
333
- }
334
 
335
- headers = {
336
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
337
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
338
- "Accept-Language": "en-US,en;q=0.5",
339
- "Accept-Encoding": "gzip, deflate",
340
- "Referer": "https://www.google.com/"
341
  }
342
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
  try:
344
- # Busca no Google
345
- client = await _get_http_client()
346
- response = await client.get(
347
- "https://www.google.com/search",
348
- params=params,
349
- headers=headers
350
- )
351
-
352
- if response.status_code != 200:
353
- raise HTTPException(status_code=response.status_code, detail="Erro na busca")
354
-
355
- # Extração ultra-rápida
356
- image_urls = _extract_images_vectorized(response.text)
357
-
358
- if not image_urls:
359
- return JSONResponse(content={
360
- "query": q,
361
- "total_found": 0,
362
- "images": []
363
- })
364
 
365
- # Processamento paralelo com semáforo otimizado
366
- semaphore = asyncio.Semaphore(25) # Concorrência controlada
367
-
368
- # Cria tasks limitadas a 50 imagens máximo
369
- tasks = [
370
- _process_image_ultra_fast(url, include_thumbnails, semaphore)
371
- for url in image_urls[:60] # Processa um pouco mais para compensar falhas
372
- ]
373
-
374
- # Processa tudo em paralelo
375
  results = await asyncio.gather(*tasks, return_exceptions=True)
376
 
377
- # Filtra resultados válidos
378
- valid_images = [
379
- result for result in results
380
- if (not isinstance(result, Exception) and
381
- result.get('width', 0) >= min_width and
382
- result.get('height', 0) > 0)
383
- ]
384
 
385
- # Ordena por área (width * height) e limita a 50
386
- valid_images.sort(key=lambda x: x.get('width', 0) * x.get('height', 0), reverse=True)
387
- final_images = valid_images[:50]
388
 
389
- return JSONResponse(content={
390
- "query": q,
391
- "min_width_filter": min_width,
392
- "total_found": len(final_images),
393
- "thumbnails_included": include_thumbnails,
394
- "images": final_images
395
- })
396
-
397
- except httpx.TimeoutException:
398
- raise HTTPException(status_code=408, detail="Timeout na requisição")
399
  except Exception as e:
400
- raise HTTPException(status_code=500, detail=f"Erro: {str(e)}")
 
 
 
401
 
 
402
  @router.get("/thumbnail")
403
  async def get_thumbnail_fast(
404
- url: str = Query(..., description="URL da imagem"),
405
- size: int = Query(150, description="Tamanho da miniatura")
406
  ):
407
- """Gera miniatura ultra-rápida"""
 
 
408
  try:
409
- semaphore = asyncio.Semaphore(1)
410
- result = await _process_image_ultra_fast(url, True, semaphore)
411
-
412
- if result.get('thumbnail'):
413
- return JSONResponse(content={
414
- "url": result['url'],
415
- "thumbnail": result['thumbnail'],
416
- "dimensions": f"{result.get('width', 0)}x{result.get('height', 0)}"
417
- })
418
- else:
419
- raise HTTPException(status_code=500, detail="Erro ao criar miniatura")
420
 
 
 
 
 
 
 
 
 
 
 
421
  except Exception as e:
422
  raise HTTPException(status_code=500, detail=f"Erro: {str(e)}")
423
 
424
- # Cleanup
 
425
  import atexit
426
- atexit.register(lambda: _process_pool.shutdown(wait=False) if _process_pool else None)
 
12
  from typing import Optional, Tuple, List, Dict
13
  import base64
14
  from functools import lru_cache
15
+ import aiofiles
16
+ from concurrent.futures import ThreadPoolExecutor
17
  import time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  router = APIRouter()
20
 
21
+ # Pool de threads otimizado para operações CPU-intensivas (thumbnail)
22
+ thumbnail_executor = ThreadPoolExecutor(
23
+ max_workers=min(32, (os.cpu_count() or 1) + 4),
24
+ thread_name_prefix="thumbnail_"
25
  )
26
 
27
+ # Cache em memória para URLs já processadas
28
  _url_cache = {}
29
+ _cache_max_size = 1000
 
 
30
 
31
+ @router.get("/search")
32
+ async def search(
33
+ q: str = Query(..., description="Termo de pesquisa para imagens"),
34
+ min_width: int = Query(1200, description="Largura mínima das imagens (padrão: 1200px)"),
35
+ include_thumbnails: bool = Query(True, description="Incluir miniaturas base64 nas respostas")
36
+ ):
37
+ """
38
+ Busca imagens no Google Imagens com máxima performance
39
+ """
40
+ start_time = time.time()
41
+
42
+ # URL do Google Imagens com parâmetros para imagens grandes
43
+ google_images_url = "http://www.google.com/search"
44
+
45
+ params = {
46
+ "tbm": "isch",
47
+ "q": q,
48
+ "start": 0,
49
+ "sa": "N",
50
+ "asearch": "arc",
51
+ "cs": "1",
52
+ "tbs": "isz:l",
53
+ "async": f"arc_id:srp_GgSMaOPQOtL_5OUPvbSTOQ_110,ffilt:all,ve_name:MoreResultsContainer,inf:1,_id:arc-srp_GgSMaOPQOtL_5OUPvbSTOQ_110,_pms:s,_fmt:pc"
54
+ }
55
+
56
+ headers = {
57
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
58
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
59
+ "Accept-Language": "pt-BR,pt;q=0.8,en-US;q=0.5,en;q=0.3",
60
+ "Accept-Encoding": "gzip, deflate",
61
+ "Connection": "keep-alive",
62
+ "Referer": "https://www.google.com/"
63
+ }
64
 
65
  try:
66
+ # Busca no Google (rápida)
67
+ async with httpx.AsyncClient(timeout=30.0) as client:
68
+ response = await client.get(google_images_url, params=params, headers=headers)
69
 
70
+ if response.status_code != 200:
71
+ raise HTTPException(status_code=response.status_code, detail="Erro ao buscar no Google Imagens")
 
 
72
 
73
+ # Extração otimizada
74
+ images = extract_images_from_response_optimized(response.text)
 
 
 
 
75
 
76
+ # Processamento paralelo massivo
77
+ enriched_images = await enrich_images_ultra_fast(images, include_thumbnails)
78
+
79
+ # Filtragem rápida
80
+ valid_images = [
81
+ img for img in enriched_images
82
+ if img.get('width', 0) >= min_width and img.get('height', 0) > 0
83
+ ]
84
+
85
+ # Se poucos resultados, busca adicional em paralelo
86
+ if len(valid_images) < 20:
87
+ params["tbs"] = "isz:lt,islt:4mp"
88
+
89
+ async with httpx.AsyncClient(timeout=30.0) as client:
90
+ response2 = await client.get(google_images_url, params=params, headers=headers)
91
+
92
+ if response2.status_code == 200:
93
+ additional_images = extract_images_from_response_optimized(response2.text)
94
+ additional_enriched = await enrich_images_ultra_fast(additional_images, include_thumbnails)
95
+
96
+ # Merge rápido com set para deduplicação
97
+ seen_urls = {img.get('url') for img in valid_images}
98
+ for img in additional_enriched:
99
+ if (img.get('url') not in seen_urls
100
+ and img.get('width', 0) >= min_width
101
+ and img.get('height', 0) > 0):
102
+ valid_images.append(img)
103
+ seen_urls.add(img.get('url'))
104
+
105
+ # Ordenação e limitação
106
+ valid_images.sort(key=lambda x: x.get('width', 0), reverse=True)
107
+ final_images = valid_images[:50]
108
+
109
+ total_time = time.time() - start_time
110
+
111
+ return JSONResponse(content={
112
+ "query": q,
113
+ "min_width_filter": min_width,
114
+ "total_found": len(final_images),
115
+ "thumbnails_included": include_thumbnails,
116
+ "processing_time": round(total_time, 2),
117
+ "images": final_images
118
+ })
119
+
120
+ except httpx.TimeoutException:
121
+ raise HTTPException(status_code=408, detail="Timeout na requisição ao Google")
122
+ except Exception as e:
123
+ raise HTTPException(status_code=500, detail=f"Erro ao executar a busca: {str(e)}")
124
+
125
+
126
+ @lru_cache(maxsize=500)
127
+ def clean_wikimedia_url_cached(url: str) -> str:
128
+ """
129
+ Versão cached da limpeza de URLs do Wikimedia
130
+ """
131
+ if 'wikimedia.org' in url and '/thumb/' in url:
132
+ try:
133
+ parts = url.split('/thumb/')
134
+ if len(parts) == 2:
135
+ before_thumb = parts[0]
136
+ after_thumb = parts[1]
137
+ path_parts = after_thumb.split('/')
138
+
139
+ if len(path_parts) >= 3:
140
+ original_path = '/'.join(path_parts[:3])
141
+ return f"{before_thumb}/{original_path}"
142
+ except:
143
+ pass
144
  return url
145
 
 
 
 
 
 
146
 
147
+ def extract_images_from_response_optimized(response_text: str) -> List[Dict]:
148
+ """
149
+ Extração ultra-otimizada usando regex compilado e processamento em lote
150
+ """
151
+ # Regex compilado (mais rápido)
152
+ pattern = re.compile(r'https?://[^\s"\'<>]+?\.(?:jpg|png|webp|jpeg)\b', re.IGNORECASE)
153
 
154
+ # Extração em uma única passada
155
+ image_urls = pattern.findall(response_text)
 
156
 
157
+ # Deduplicação com set (O(1) lookup)
158
+ seen_urls = set()
159
+ images = []
 
 
 
 
 
160
 
161
+ # Processa URLs em lote
162
+ for url in image_urls[:200]: # Aumentado para compensar filtragem
163
+ cleaned_url = clean_wikimedia_url_cached(url)
164
+ if cleaned_url not in seen_urls:
165
+ seen_urls.add(cleaned_url)
166
+ images.append({"url": cleaned_url, "width": None, "height": None})
167
+
168
+ return images
169
 
 
 
 
 
170
 
171
+ def get_image_size_super_fast(data: bytes) -> Optional[Tuple[int, int]]:
172
+ """
173
+ Parsing ultra-otimizado - apenas formatos mais comuns primeiro
174
+ """
175
  if len(data) < 24:
176
  return None
177
 
178
+ try:
179
+ # JPEG (mais comum) - otimizado
180
+ if data[:2] == b'\xff\xd8':
181
+ # Busca mais eficiente pelos markers
182
+ for i in range(2, min(len(data) - 8, 1000)): # Limita busca
183
+ if data[i:i+2] in (b'\xff\xc0', b'\xff\xc2'):
184
+ if i + 9 <= len(data):
185
+ height = struct.unpack('>H', data[i+5:i+7])[0]
186
+ width = struct.unpack('>H', data[i+7:i+9])[0]
187
+ if width > 0 and height > 0:
188
+ return width, height
189
+
190
+ # PNG (segundo mais comum)
191
+ elif data[:8] == b'\x89PNG\r\n\x1a\n' and len(data) >= 24:
192
+ width = struct.unpack('>I', data[16:20])[0]
193
+ height = struct.unpack('>I', data[20:24])[0]
194
+ if width > 0 and height > 0:
195
+ return width, height
196
+
197
+ # WebP (crescimento)
198
+ elif data[:12] == b'RIFF' + data[4:8] + b'WEBP' and len(data) >= 30:
 
 
 
 
 
199
  if data[12:16] == b'VP8 ':
200
+ width = struct.unpack('<H', data[26:28])[0] & 0x3fff
201
+ height = struct.unpack('<H', data[28:30])[0] & 0x3fff
202
+ if width > 0 and height > 0:
203
+ return width, height
204
+ except:
205
+ pass
206
 
207
  return None
208
 
209
+
210
+ def create_thumbnail_cpu_optimized(image_data: bytes, max_size: int = 200) -> Optional[str]:
211
+ """
212
+ Versão CPU-otimizada para threading
213
+ """
214
  if not image_data or len(image_data) < 100:
215
  return None
216
 
217
  try:
218
+ # Abre imagem (rápido)
219
+ with Image.open(io.BytesIO(image_data)) as image:
220
+ # Conversão rápida para RGB
221
+ if image.mode != 'RGB':
222
+ if image.mode in ('RGBA', 'LA'):
223
+ # Background branco para transparências
224
+ bg = Image.new('RGB', image.size, (255, 255, 255))
225
+ bg.paste(image, mask=image.split()[-1] if 'A' in image.mode else None)
226
+ image = bg
227
+ else:
228
+ image = image.convert('RGB')
229
+
230
+ # Cálculo otimizado de proporções
231
+ w, h = image.size
 
 
 
 
 
 
 
232
  if w > h:
233
+ new_w, new_h = max_size, max(1, (h * max_size) // w)
 
 
234
  else:
235
+ new_w, new_h = max(1, (w * max_size) // h), max_size
 
 
236
 
237
+ # Resize com filtro mais rápido para thumbnails
238
+ thumbnail = image.resize((new_w, new_h), Image.Resampling.BILINEAR)
239
+
240
+ # Salva com configurações otimizadas
241
+ buffer = io.BytesIO()
242
+ thumbnail.save(buffer, format='JPEG', quality=80, optimize=False) # optimize=False é mais rápido
243
+
244
+ return f"data:image/jpeg;base64,{base64.b64encode(buffer.getvalue()).decode('utf-8')}"
245
+
246
+ except Exception as e:
 
 
 
 
247
  return None
248
 
 
 
 
 
 
249
 
250
+ async def download_and_process_image(session: httpx.AsyncClient, url: str, include_thumbnail: bool) -> Dict:
251
+ """
252
+ Download e processamento otimizado de uma única imagem
253
+ """
254
+ # Verifica cache primeiro
255
+ cache_key = f"{url}_{include_thumbnail}"
256
+ if cache_key in _url_cache:
257
+ return _url_cache[cache_key].copy()
258
 
259
+ clean_url = url.replace('\\u003d', '=').replace('\\u0026', '&').replace('\\\\', '').replace('\\/', '/')
 
 
 
 
260
 
261
+ headers = {
262
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
263
+ 'Accept': 'image/*',
264
+ 'Connection': 'close'
265
+ }
266
+
267
+ width, height, thumbnail_b64 = None, None, None
268
+
269
+ try:
270
+ # Estratégia otimizada: tamanhos incrementais
271
+ ranges = ['0-8192', '0-32768', '0-131072'] if include_thumbnail else ['0-2048']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
 
273
+ for range_header in ranges:
274
+ headers['Range'] = f'bytes={range_header}'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
+ try:
277
+ response = await session.get(clean_url, headers=headers, timeout=6.0)
278
+ if response.status_code in [200, 206] and len(response.content) > 100:
279
+
280
+ # Parsing rápido de dimensões
281
+ if not width or not height:
282
+ dimensions = get_image_size_super_fast(response.content)
283
  if dimensions:
284
  width, height = dimensions
285
+
286
+ # Thumbnail em thread separada se necessário
287
+ if include_thumbnail and not thumbnail_b64:
288
+ loop = asyncio.get_event_loop()
289
+ thumbnail_b64 = await loop.run_in_executor(
290
+ thumbnail_executor,
291
+ create_thumbnail_cpu_optimized,
292
+ response.content
293
+ )
294
+
295
+ # Se conseguiu tudo o que precisava, para por aqui
296
+ if width and height and (not include_thumbnail or thumbnail_b64):
297
+ break
298
 
299
+ except:
300
+ continue # Tenta próximo range
 
 
 
 
 
 
 
301
 
302
+ # Fallback final: download completo se necessário
303
+ if (not width or not height or (include_thumbnail and not thumbnail_b64)):
304
+ try:
305
+ del headers['Range']
306
+ response = await session.get(clean_url, headers=headers, timeout=8.0)
307
+ if response.status_code == 200 and len(response.content) < 2000000: # Max 2MB
308
+
309
+ if not width or not height:
310
+ try:
311
+ with Image.open(io.BytesIO(response.content)) as img:
312
+ width, height = img.size
313
+ except:
314
+ pass
315
+
316
+ if include_thumbnail and not thumbnail_b64:
317
+ loop = asyncio.get_event_loop()
318
+ thumbnail_b64 = await loop.run_in_executor(
319
+ thumbnail_executor,
320
+ create_thumbnail_cpu_optimized,
321
+ response.content
322
+ )
323
+ except:
324
+ pass
 
325
 
326
+ except Exception as e:
327
+ pass
 
 
 
 
 
328
 
329
+ result = {
330
+ "url": clean_url,
331
+ "width": width,
332
+ "height": height
 
 
333
  }
334
 
335
+ if include_thumbnail:
336
+ result["thumbnail"] = thumbnail_b64
337
+
338
+ # Cache do resultado (limita tamanho do cache)
339
+ if len(_url_cache) < _cache_max_size:
340
+ _url_cache[cache_key] = result.copy()
341
+
342
+ return result
343
+
344
+
345
+ async def enrich_images_ultra_fast(images: List[Dict], include_thumbnails: bool = True) -> List[Dict]:
346
+ """
347
+ Processamento ultra-paralelo com todas as otimizações modernas
348
+ """
349
+ if not images:
350
+ return []
351
+
352
+ # Configuração HTTP2 otimizada para máxima concorrência
353
+ connector = httpx.AsyncClient(
354
+ timeout=httpx.Timeout(10.0),
355
+ limits=httpx.Limits(
356
+ max_keepalive_connections=100, # Muito mais conexões
357
+ max_connections=150, # Pool maior
358
+ keepalive_expiry=30.0 # Mantém conexões por mais tempo
359
+ ),
360
+ http2=False # HTTP/1.1 ainda é mais rápido para muitas conexões pequenas
361
+ )
362
+
363
+ # Semáforo mais agressivo
364
+ semaphore = asyncio.Semaphore(30) # Muito mais concorrência
365
+
366
+ async def process_single_image(image_data):
367
+ async with semaphore:
368
+ return await download_and_process_image(connector, image_data["url"], include_thumbnails)
369
+
370
  try:
371
+ # Cria todas as tasks de uma vez
372
+ tasks = [process_single_image(img) for img in images]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
 
374
+ # Processa tudo em paralelo com gather otimizado
 
 
 
 
 
 
 
 
 
375
  results = await asyncio.gather(*tasks, return_exceptions=True)
376
 
377
+ # Filtragem rápida
378
+ valid_results = []
379
+ for result in results:
380
+ if not isinstance(result, Exception) and result.get('width') and result.get('height'):
381
+ valid_results.append(result)
 
 
382
 
383
+ return valid_results
 
 
384
 
 
 
 
 
 
 
 
 
 
 
385
  except Exception as e:
386
+ return []
387
+ finally:
388
+ await connector.aclose()
389
+
390
 
391
+ # Endpoint adicional otimizado
392
  @router.get("/thumbnail")
393
  async def get_thumbnail_fast(
394
+ url: str = Query(..., description="URL da imagem para gerar miniatura"),
395
+ size: int = Query(200, description="Tamanho máximo da miniatura em pixels")
396
  ):
397
+ """
398
+ Obtém miniatura ultra-rápida de uma imagem específica
399
+ """
400
  try:
401
+ async with httpx.AsyncClient(timeout=8.0) as client:
402
+ result = await download_and_process_image(client, url, True)
 
 
 
 
 
 
 
 
 
403
 
404
+ if result.get('thumbnail'):
405
+ return JSONResponse(content={
406
+ "url": result['url'],
407
+ "thumbnail": result['thumbnail'],
408
+ "dimensions": f"{result.get('width', 0)}x{result.get('height', 0)}",
409
+ "size": size
410
+ })
411
+ else:
412
+ raise HTTPException(status_code=500, detail="Erro ao criar miniatura")
413
+
414
  except Exception as e:
415
  raise HTTPException(status_code=500, detail=f"Erro: {str(e)}")
416
 
417
+
418
+ # Cleanup do executor na finalização
419
  import atexit
420
+ atexit.register(lambda: thumbnail_executor.shutdown(wait=False))