File size: 2,975 Bytes
4efde5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
"""
Query utilities for handling large datasets and avoiding URI length limits.
"""
from typing import List, Any, Dict, Optional
from utils.logger import logger


async def batch_query_in(
    client,
    table_name: str,
    select_fields: str,
    in_field: str,
    in_values: List[Any],
    batch_size: int = 100,
    additional_filters: Optional[Dict[str, Any]] = None,
    schema: Optional[str] = None
) -> List[Dict[str, Any]]:
    """
    Execute a query with .in_() filtering, automatically batching large arrays to avoid URI limits.
    
    Args:
        client: Supabase client
        table_name: Name of the table to query
        select_fields: Fields to select (e.g., '*' or 'id, name, created_at')
        in_field: Field name for the .in_() filter
        in_values: List of values to filter by
        batch_size: Maximum number of values per batch (default: 100)
        additional_filters: Optional dict of additional filters to apply
        schema: Optional schema name (for basejump tables)
    
    Returns:
        List of all matching records from all batches
    """
    if not in_values:
        return []
    
    all_results = []
    
    # If values list is small, do a single query
    if len(in_values) <= batch_size:
        query = client.schema(schema).from_(table_name) if schema else client.table(table_name)
        query = query.select(select_fields).in_(in_field, in_values)
        
        # Apply additional filters
        if additional_filters:
            for field, value in additional_filters.items():
                if field.endswith('_gte'):
                    query = query.gte(field[:-4], value)
                elif field.endswith('_eq'):
                    query = query.eq(field[:-3], value)
                else:
                    query = query.eq(field, value)
        
        result = await query.execute()
        return result.data or []
    
    # Batch processing for large arrays
    logger.debug(f"Batching {len(in_values)} {in_field} values into chunks of {batch_size}")
    
    for i in range(0, len(in_values), batch_size):
        batch_values = in_values[i:i + batch_size]
        
        query = client.schema(schema).from_(table_name) if schema else client.table(table_name)
        query = query.select(select_fields).in_(in_field, batch_values)
        
        # Apply additional filters
        if additional_filters:
            for field, value in additional_filters.items():
                if field.endswith('_gte'):
                    query = query.gte(field[:-4], value)
                elif field.endswith('_eq'):
                    query = query.eq(field[:-3], value)
                else:
                    query = query.eq(field, value)
        
        batch_result = await query.execute()
        if batch_result.data:
            all_results.extend(batch_result.data)
    
    logger.debug(f"Batched query returned {len(all_results)} total results")
    return all_results