Tools / Modules /_query_optimizer.py
chmielvu's picture
Upload folder using huggingface_hub
588592f verified
raw
history blame
29 kB
"""
Query Optimizer Module with Self-Consistency Chain-of-Thought (SC-CoT).
Optimizes search queries using AI-generated candidate scoring with a fallback chain:
1. Mistral API (magistral-medium-2509) - Primary
2. HuggingFace Inference (openai/gpt-oss-20b:cheapest) - Fallback
3. Bypass (return raw query) - Final fallback
Two optimization modes:
- optimize_for_search_engine(): Boolean operators, site:, filetype:, exact phrases
- optimize_for_ai_search(): Clear intent, context, specific questions
"""
from __future__ import annotations
import json
import os
import re
from typing import Annotated, Any, Literal
import gradio as gr
from pydantic import BaseModel, Field
from app import _log_call_end, _log_call_start, _truncate_for_log
from ._docstrings import autodoc
# ===========================================================================
# Pydantic Schemas for Structured Output
# ===========================================================================
class OptimizedCandidate(BaseModel):
"""A single optimized query candidate with reasoning."""
version: int = Field(description="Candidate version number (1-based)")
optimized_query: str = Field(description="The optimized query string")
reasoning: list[str] = Field(description="List of reasoning steps explaining optimizations")
class GenerationOutput(BaseModel):
"""Output from candidate generation phase."""
original_query: str = Field(description="The original user query")
candidates: list[OptimizedCandidate] = Field(description="List of generated candidates")
class ScoringOutput(BaseModel):
"""Output from candidate selection phase."""
selected_version: int = Field(description="Version number of the best candidate")
# ===========================================================================
# Core Query Optimizer Class
# ===========================================================================
class QueryOptimizer:
"""
Self-Consistency Chain-of-Thought query optimizer.
Generates multiple optimized candidates and selects the best one through
self-consistency scoring. Implements a fallback chain for reliability.
"""
# Few-shot examples for search engine optimization
_SEARCH_ENGINE_EXAMPLES = """
Example 1:
Input: python fastapi performance
Candidates:
1. ("python fastapi performance", "Direct query covers main concepts")
2. ("fastapi performance optimization python", "Added 'optimization' for more specific results")
3. ("site:stackoverflow.com fastapi performance python", "Targeted technical Q&A for performance issues")
4. ("fastapi async performance benchmark", "Added 'async' and 'benchmark' for technical depth")
5. "fastapi OR flask performance python", "Added comparison with Flask for broader context")
Example 2:
Input: climate change effects on agriculture
Candidates:
1. ("climate change effects on agriculture", "Clear and comprehensive query")
2. ("site:nature.com OR site:science.org climate change agriculture", "Targeted reputable scientific sources")
3. "\"climate change\" AND agriculture filetype:pdf", "Using exact phrase match and PDF filter for research papers")
4. ("climate change impact crop yield 2023..2024", "Added temporal filter and specific terminology")
5. ("agricultural adaptation climate change strategies", "Rephrased to focus on solutions")
Example 3:
Input: machine learning tutorial python
Candidates:
1. ("python machine learning tutorial", "Reordered for better SEO")
2. ("site:youtube.com python machine learning tutorial", "Targeted video tutorials")
3. ("python machine learning tutorial filetype:pdf", "Focus on PDF documentation")
4. ("machine learning python sklearn tutorial", "Added popular library 'sklearn' for relevance")
5. "\"machine learning\" AND python AND tutorial", "Using boolean operators for precision")
Example 4:
Input: react native vs flutter
Candidates:
1. ("react native vs flutter comparison", "Added 'comparison' for explicit intent")
2. ("site:reddit.com \"react native\" flutter", "Targeted community discussions")
3. "\"react native\" OR flutter mobile development", "Broader search for mobile frameworks")
4. ("react native flutter performance benchmark", "Focus on technical comparison")
5. ("flutter vs react native 2024", "Added year for current information")
Example 5:
Input: best restaurants in tokyo
Candidates:
1. ("best restaurants tokyo", "Simplified for broad search")
2. ("site:michelin.com Tokyo restaurants", "Targeted Michelin guide sources")
3. ("Tokyo restaurant guide 2024", "Added temporal context")
4. "\"best restaurants\" AND tokyo AND review", "Boolean operators for precision")
5. ("tokyo food guide michelin OR local", "Added 'local' for authentic recommendations")
"""
# Few-shot examples for AI search optimization
_AI_SEARCH_EXAMPLES = """
Example 1:
Input: python fastapi performance
Candidates:
1. ("What are the performance characteristics of FastAPI in Python, and how does it compare to other web frameworks?", "Added comparison context and framework focus")
2. ("Explain the key performance optimization techniques for FastAPI applications in Python.", "Focused on actionable optimization strategies")
3. ("How does FastAPI's async/await model impact performance compared to synchronous frameworks?", "Targeted technical architectural question")
4. ("What are the benchmarks and real-world performance metrics for FastAPI in production environments?", "Asked for empirical data")
5. ("How can I identify and resolve performance bottlenecks in FastAPI applications?", "Problem-solving focused")
Example 2:
Input: climate change effects on agriculture
Candidates:
1. ("What are the primary impacts of climate change on global agricultural productivity and crop yields?", "Comprehensive question covering direct effects")
2. ("How is climate change affecting different agricultural regions around the world?", "Geographic focus")
3. ("What adaptation strategies are farmers using to cope with climate change impacts?", "Solution-oriented focus")
4. ("What scientific evidence exists linking climate change to agricultural changes?", "Evidence-based inquiry")
5. ("How will climate change affect food security and agricultural sustainability by 2050?", "Temporal and sustainability focus")
Example 3:
Input: react native vs flutter
Candidates:
1. ("What are the key differences between React Native and Flutter in terms of performance, development experience, and ecosystem?", "Comprehensive comparison framework")
2. ("Which cross-platform mobile framework is better suited for startup applications: React Native or Flutter?", "Use-case specific question")
3. ("How do React Native and Flutter compare in terms of learning curve, community support, and hiring availability?", "Practical development considerations")
4. ("What are the long-term maintenance implications of choosing React Native vs Flutter?", "Strategic business question")
5. ("Which framework provides better native performance and access to device features: React Native or Flutter?", "Technical performance focus")
Example 4:
Input: machine learning tutorial python
Candidates:
1. ("What is the best learning path for getting started with machine learning using Python?", "Learning path focused question")
2. ("Can you recommend a comprehensive Python machine learning tutorial for beginners?", "Resource-seeking question")
3. ("What are the essential Python libraries and tools for implementing machine learning algorithms?", "Tool ecosystem question")
4. ("How can I build my first machine learning model in Python from scratch?", hands-on implementation focus")
5. ("What are the common pitfalls and best practices for learning machine learning with Python?", "Learning guidance question")
Example 5:
Input: quantum computing explained
Candidates:
1. ("Can you explain quantum computing in simple terms for someone without a physics background?", "Accessible explanation request")
2. ("What are the fundamental principles of quantum computing and how do they differ from classical computing?", "Conceptual comparison question")
3. ("What are the practical applications of quantum computing and when might they become viable?", "Real-world impact question")
4. ("How do qubits work and why do they enable quantum computational advantages?", "Technical explanation question")
5. ("What are the current limitations and challenges in developing practical quantum computers?", "Critical analysis question")
"""
_SELECTOR_PROMPT = """
Given the original query and multiple optimized candidates, select the best one.
Criteria for selection:
- Relevance: Most accurately captures the user's intent
- Precision: Will return the most relevant results
- Completeness: Covers all important aspects of the query
- Clarity: Easy to understand and well-structured
Return only the version number of the best candidate (1-indexed).
"""
def __init__(self) -> None:
"""Initialize the query optimizer with API clients."""
self._mistral_api_key: str | None = os.getenv("MISTRAL_API_KEY")
self._hf_token: str | None = os.getenv("HF_TOKEN")
self._mistral_model: str = "magistral-medium-2509"
self._hf_model: str = "openai/gpt-oss-20b:cheapest"
self._hf_endpoint: str = "https://router.huggingface.co/v1"
def _mistral_generate(
self, prompt: str, response_format: dict[str, Any]
) -> str:
"""Generate structured output using Mistral API with response_format."""
if not self._mistral_api_key:
raise ValueError("MISTRAL_API_KEY not set")
import httpx
messages = [
{
"role": "user",
"content": prompt,
}
]
payload = {
"model": self._mistral_model,
"messages": messages,
"response_format": response_format,
"max_tokens": 2000,
"temperature": 0.3,
}
headers = {
"Authorization": f"Bearer {self._mistral_api_key}",
"Content-Type": "application/json",
}
response = httpx.post(
"https://api.mistral.ai/v1/chat/completions",
json=payload,
headers=headers,
timeout=30.0,
)
response.raise_for_status()
result = response.json()
if "choices" not in result or not result["choices"]:
raise ValueError("Invalid Mistral API response: no choices")
return result["choices"][0]["message"]["content"]
def _hf_generate(self, prompt: str) -> str:
"""Generate output using HuggingFace Inference API."""
if not self._hf_token:
raise ValueError("HF_TOKEN not set")
import httpx
payload = {
"model": self._hf_model,
"messages": [
{
"role": "user",
"content": prompt,
}
],
"max_tokens": 2000,
"temperature": 0.3,
}
headers = {
"Authorization": f"Bearer {self._hf_token}",
"Content-Type": "application/json",
}
response = httpx.post(
f"{self._hf_endpoint}/chat/completions",
json=payload,
headers=headers,
timeout=30.0,
)
response.raise_for_status()
result = response.json()
if "choices" not in result or not result["choices"]:
raise ValueError("Invalid HF API response: no choices")
return result["choices"][0]["message"]["content"]
def _extract_json_from_response(self, response: str) -> str:
"""Extract JSON from a response that may have markdown formatting."""
# Try to find JSON between ```json and ``` or between ``` and ```
patterns = [
r"```json\s*([\s\S]*?)\s*```",
r"```\s*([\s\S]*?)\s*```",
r"(\{[\s\S]*\})",
]
for pattern in patterns:
match = re.search(pattern, response.strip())
if match:
return match.group(1).strip()
return response.strip()
def _optimize_search_engine_mistral(self, query: str) -> str:
"""Optimize for search engines using Mistral API."""
prompt = f"""Generate 5 optimized versions of the following search query for traditional search engines (DuckDuckGo, Google, etc.).
Optimization techniques to use:
- Add boolean operators (AND, OR, NOT)
- Use site: to target specific domains
- Use filetype: to filter by document type
- Use exact phrases with quotes
- Add relevant keywords for precision
- Include temporal filters when appropriate
- Target reputable sources (Wikipedia, StackOverflow, GitHub, etc.)
{self._SEARCH_ENGINE_EXAMPLES}
Original query: {query}
Generate candidates in the following JSON format:
{{
"original_query": "{query}",
"candidates": [
{{
"version": 1,
"optimized_query": "...",
"reasoning": ["...", "..."]
}},
...
]
}}
Return ONLY valid JSON, no markdown formatting."""
return self._mistral_generate(
prompt,
response_format={
"type": "json_schema",
"json_schema": GenerationOutput.model_json_schema(),
},
)
def _optimize_ai_search_mistral(self, query: str) -> str:
"""Optimize for AI search using Mistral API."""
prompt = f"""Generate 5 optimized versions of the following query for AI-powered search engines (Perplexity, Gemini Search, etc.).
Optimization techniques to use:
- Reframe as clear, specific questions
- Add context about what information is needed
- Include comparative or evaluative language when relevant
- Ask for explanations, examples, or step-by-step guides
- Include temporal context (current state, recent developments)
- Focus on actionable information or insights
{self._AI_SEARCH_EXAMPLES}
Original query: {query}
Generate candidates in the following JSON format:
{{
"original_query": "{query}",
"candidates": [
{{
"version": 1,
"optimized_query": "...",
"reasoning": ["...", "..."]
}},
...
]
}}
Return ONLY valid JSON, no markdown formatting."""
return self._mistral_generate(
prompt,
response_format={
"type": "json_schema",
"json_schema": GenerationOutput.model_json_schema(),
},
)
def _select_best_mistral(self, candidates_json: str) -> int:
"""Select best candidate using Mistral API."""
prompt = f"""{self._SELECTOR_PROMPT}
{candidates_json}
Return the version number (1-5) of the best candidate."""
response = self._mistral_generate(
prompt,
response_format={
"type": "json_schema",
"json_schema": ScoringOutput.model_json_schema(),
},
)
# Parse JSON response
json_str = self._extract_json_from_response(response)
result = json.loads(json_str)
return result["selected_version"]
def _optimize_search_engine_hf(self, query: str) -> str:
"""Optimize for search engines using HF Inference (fallback)."""
prompt = f"""Generate 5 optimized search query candidates. Return as JSON with format:
{{
"original_query": "...",
"candidates": [
{{"version": 1, "optimized_query": "...", "reasoning": ["..."]}},
...
]
}}
Query: {query}
Optimize with boolean operators, site:, filetype:, quotes for phrases, and relevant keywords."""
response = self._hf_generate(prompt)
return self._extract_json_from_response(response)
def _optimize_ai_search_hf(self, query: str) -> str:
"""Optimize for AI search using HF Inference (fallback)."""
prompt = f"""Generate 5 optimized query candidates for AI search. Return as JSON with format:
{{
"original_query": "...",
"candidates": [
{{"version": 1, "optimized_query": "...", "reasoning": ["..."]}},
...
]
}}
Query: {query}
Optimize as clear, specific questions with context and intent."""
response = self._hf_generate(prompt)
return self._extract_json_from_response(response)
def _select_best_hf(self, candidates_json: str) -> int:
"""Select best candidate using HF Inference (fallback)."""
prompt = f"""{self._SELECTOR_PROMPT}
{candidates_json}
Return only the number (1-5)."""
response = self._hf_generate(prompt)
# Try to extract number from response
match = re.search(r"\b([1-5])\b", response)
if match:
return int(match.group(1))
return 1 # Default to first candidate
def _parse_candidates(self, json_str: str, original_query: str) -> GenerationOutput:
"""Parse candidate JSON with fallback."""
try:
json_clean = self._extract_json_from_response(json_str)
return GenerationOutput.model_validate_json(json_clean)
except Exception:
# Fallback: create minimal candidate with original query
return GenerationOutput(
original_query=original_query,
candidates=[
OptimizedCandidate(
version=1,
optimized_query=original_query,
reasoning=["Fallback: using original query"],
)
],
)
def _run_optimization_chain(
self,
query: str,
mode: Literal["search_engine", "ai_search"],
) -> tuple[GenerationOutput, int, str]:
"""
Run optimization with fallback chain.
Returns:
(candidates, best_version, provider_used)
"""
provider = "bypass"
# Try Mistral API first
try:
if mode == "search_engine":
response = self._optimize_search_engine_mistral(query)
else:
response = self._optimize_ai_search_mistral(query)
candidates = self._parse_candidates(response, query)
best_version = self._select_best_mistral(response)
provider = "mistral"
return candidates, best_version, provider
except Exception as exc:
print(f"[QueryOptimizer] Mistral failed: {exc}", flush=True)
# Fallback to HF Inference
try:
if mode == "search_engine":
response = self._optimize_search_engine_hf(query)
else:
response = self._optimize_ai_search_hf(query)
candidates = self._parse_candidates(response, query)
best_version = self._select_best_hf(response)
provider = "hf"
return candidates, best_version, provider
except Exception as exc:
print(f"[QueryOptimizer] HF failed: {exc}", flush=True)
# Final bypass: return original query
candidates = GenerationOutput(
original_query=query,
candidates=[
OptimizedCandidate(
version=1,
optimized_query=query,
reasoning=["Bypass: using original query due to optimization failure"],
)
],
)
return candidates, 1, provider
def optimize_for_search_engine(self, query: str) -> tuple[str, dict[str, Any]]:
"""
Optimize query for traditional search engines.
Optimizes with boolean operators, site:, filetype:, exact phrases.
Args:
query: The original search query
Returns:
(optimized_query, metadata) tuple with metadata including:
- original_query: The input query
- all_candidates: List of all generated candidates
- reasoning: Reasoning for selected candidate
- provider: Which provider was used (mistral/hf/bypass)
"""
candidates, best_version, provider = self._run_optimization_chain(
query, "search_engine"
)
# Get selected candidate
selected = next(
(c for c in candidates.candidates if c.version == best_version),
candidates.candidates[0],
)
metadata = {
"original_query": candidates.original_query,
"all_candidates": [
{"version": c.version, "query": c.optimized_query}
for c in candidates.candidates
],
"reasoning": selected.reasoning,
"provider": provider,
}
return selected.optimized_query, metadata
def optimize_for_ai_search(self, query: str) -> tuple[str, dict[str, Any]]:
"""
Optimize query for AI-powered search engines.
Optimizes with clear intent, context, specific questions.
Args:
query: The original search query
Returns:
(optimized_query, metadata) tuple with metadata including:
- original_query: The input query
- all_candidates: List of all generated candidates
- reasoning: Reasoning for selected candidate
- provider: Which provider was used (mistral/hf/bypass)
"""
candidates, best_version, provider = self._run_optimization_chain(query, "ai_search")
# Get selected candidate
selected = next(
(c for c in candidates.candidates if c.version == best_version),
candidates.candidates[0],
)
metadata = {
"original_query": candidates.original_query,
"all_candidates": [
{"version": c.version, "query": c.optimized_query}
for c in candidates.candidates
],
"reasoning": selected.reasoning,
"provider": provider,
}
return selected.optimized_query, metadata
# Singleton instance for module-level caching
_optimizer_instance: QueryOptimizer | None = None
def get_optimizer() -> QueryOptimizer:
"""Get or create the singleton optimizer instance."""
global _optimizer_instance
if _optimizer_instance is None:
_optimizer_instance = QueryOptimizer()
return _optimizer_instance
# ===========================================================================
# Gradio Tool Functions
# ===========================================================================
@autodoc(
summary="Optimize a search query for traditional search engines using SC-CoT with fallback chain (Mistral → HF → bypass).",
)
def Optimize_for_Search_Engine(
query: Annotated[str, "The search query to optimize."],
) -> str:
"""
Optimize a query for traditional search engines (DuckDuckGo, Google, etc.).
Uses Self-Consistency Chain-of-Thought with fallback chain:
1. Mistral API (magistral-medium-2509) - Primary
2. HuggingFace Inference - Fallback
3. Bypass (return raw query) - Final fallback
Optimization techniques:
- Boolean operators (AND, OR, NOT)
- site: for domain targeting
- filetype: for document type filtering
- Exact phrases with quotes
- Relevant keywords for precision
- Temporal filters when appropriate
"""
_log_call_start("Optimize_for_Search_Engine", query=query)
if not query or not query.strip():
result = "No query provided. Please enter a search query to optimize."
_log_call_end("Optimize_for_Search_Engine", _truncate_for_log(result))
return result
optimizer = get_optimizer()
try:
optimized, metadata = optimizer.optimize_for_search_engine(query)
lines = [
f"Original: {metadata['original_query']}",
f"Optimized: {optimized}",
f"Provider: {metadata['provider']}",
"",
"All candidates:",
]
for i, candidate in enumerate(metadata["all_candidates"], 1):
prefix = "→" if i == 1 else " "
lines.append(f"{prefix} {candidate['version']}. {candidate['query']}")
lines.append("")
lines.append("Reasoning:")
lines.extend(f" • {step}" for step in metadata["reasoning"])
result = "\n".join(lines)
_log_call_end("Optimize_for_Search_Engine", _truncate_for_log(result))
return result
except Exception as exc:
result = f"Optimization failed: {exc}"
_log_call_end("Optimize_for_Search_Engine", _truncate_for_log(result))
return result
@autodoc(
summary="Optimize a search query for AI-powered search engines using SC-CoT with fallback chain (Mistral → HF → bypass).",
)
def Optimize_for_AI_Search(
query: Annotated[str, "The search query to optimize."],
) -> str:
"""
Optimize a query for AI-powered search engines (Perplexity, Gemini, etc.).
Uses Self-Consistency Chain-of-Thought with fallback chain:
1. Mistral API (magistral-medium-2509) - Primary
2. HuggingFace Inference - Fallback
3. Bypass (return raw query) - Final fallback
Optimization techniques:
- Clear, specific questions
- Context about what information is needed
- Comparative or evaluative language
- Requests for explanations or examples
- Temporal context (current state, recent developments)
- Focus on actionable information
"""
_log_call_start("Optimize_for_AI_Search", query=query)
if not query or not query.strip():
result = "No query provided. Please enter a search query to optimize."
_log_call_end("Optimize_for_AI_Search", _truncate_for_log(result))
return result
optimizer = get_optimizer()
try:
optimized, metadata = optimizer.optimize_for_ai_search(query)
lines = [
f"Original: {metadata['original_query']}",
f"Optimized: {optimized}",
f"Provider: {metadata['provider']}",
"",
"All candidates:",
]
for i, candidate in enumerate(metadata["all_candidates"], 1):
prefix = "→" if i == 1 else " "
lines.append(f"{prefix} {candidate['version']}. {candidate['query']}")
lines.append("")
lines.append("Reasoning:")
lines.extend(f" • {step}" for step in metadata["reasoning"])
result = "\n".join(lines)
_log_call_end("Optimize_for_AI_Search", _truncate_for_log(result))
return result
except Exception as exc:
result = f"Optimization failed: {exc}"
_log_call_end("Optimize_for_AI_Search", _truncate_for_log(result))
return result
def build_interfaces() -> list[gr.Interface]:
"""Build Gradio interfaces for query optimizer tools."""
return [
gr.Interface(
fn=Optimize_for_Search_Engine,
inputs=[
gr.Textbox(
label="Query",
placeholder="Enter your search query",
max_lines=1,
info="The search query to optimize for traditional search engines",
),
],
outputs=gr.Textbox(
label="Optimization Results",
interactive=False,
lines=15,
max_lines=20,
),
title="Query Optimizer (Search Engine)",
description=(
"<div style='text-align:center'>"
"Optimize queries for traditional search engines using AI. "
"Generates multiple candidates and selects the best one. "
"Optimizes with boolean operators, site:, filetype:, and precise keywords."
"</div>"
),
api_name="optimize_for_search_engine",
flagging_mode="never",
submit_btn="Optimize",
),
gr.Interface(
fn=Optimize_for_AI_Search,
inputs=[
gr.Textbox(
label="Query",
placeholder="Enter your search query",
max_lines=1,
info="The search query to optimize for AI-powered search engines",
),
],
outputs=gr.Textbox(
label="Optimization Results",
interactive=False,
lines=15,
max_lines=20,
),
title="Query Optimizer (AI Search)",
description=(
"<div style='text-align:center'>"
"Optimize queries for AI-powered search engines using AI. "
"Generates multiple candidates and selects the best one. "
"Optimizes with clear questions, context, and specific intent."
"</div>"
),
api_name="optimize_for_ai_search",
flagging_mode="never",
submit_btn="Optimize",
),
]
__all__ = [
"QueryOptimizer",
"get_optimizer",
"Optimize_for_Search_Engine",
"Optimize_for_AI_Search",
"build_interfaces",
]