pragmatic-agent / modules /processing /query_processor.py
m97j's picture
First codes update
69c12a2
# app/modules/processing/query_processor.py
import re
from typing import List
def normalize_query(query: str) -> str:
"""
Normalize a query string.
- Trim whitespace
- (Optional) Add rules for punctuation or casing if needed
"""
return query.strip()
def deduplicate_queries(queries: List[str]) -> List[str]:
"""
Remove exact duplicate queries.
"""
seen = set()
unique = []
for q in queries:
if q not in seen:
unique.append(q)
seen.add(q)
return unique
def select_top_k_queries(queries: List[str], reranker, k: int = 5) -> List[str]:
"""
Select top-k queries using a reranker.
- Keep broad coverage while reducing redundant API calls
- Reranker should be provided externally (function/class)
"""
if not queries:
return []
scored = reranker(queries) # expected: List of (query, score)
scored.sort(key=lambda x: x[1], reverse=True)
return [q for q, _ in scored[:k]]
def process_queries(raw_output: str, max_queries: int = 5) -> List[str]:
"""
Entry point: process raw LLM output into final queries.
- Split by line
- Remove leading numbers or list markers
- Normalize each query
- Deduplicate identical queries
- Limit to max_queries
"""
queries = []
for line in raw_output.splitlines():
q = line.strip()
q = re.sub(r"^[\d\.\-\)\s]+", "", q) # remove leading numbering
q = normalize_query(q)
if q:
queries.append(q)
if len(queries) >= max_queries:
break
queries = deduplicate_queries(queries)
return queries