open-webui

Build error

open-webui / backend /open_webui /retrieval /web /azure.py

github-actions[bot]

GitHub deploy: 4d024c91d61f15a8b39171610ab1406915ef598d

d6703a1 2 months ago

4.17 kB

	import logging
	from typing import Optional
	from open_webui.retrieval.web.main import SearchResult, get_filtered_results

	log = logging.getLogger(__name__)

	"""
	Azure AI Search integration for Open WebUI.
	Documentation: https://learn.microsoft.com/en-us/python/api/overview/azure/search-documents-readme?view=azure-python

	Required package: azure-search-documents
	Install: pip install azure-search-documents
	"""


	def search_azure(
	api_key: str,
	endpoint: str,
	index_name: str,
	query: str,
	count: int,
	filter_list: Optional[list[str]] = None,
	) -> list[SearchResult]:
	"""
	Search using Azure AI Search.

	Args:
	api_key: Azure Search API key (query key or admin key)
	endpoint: Azure Search service endpoint (e.g., https://myservice.search.windows.net)
	index_name: Name of the search index to query
	query: Search query string
	count: Number of results to return
	filter_list: Optional list of domains to filter results

	Returns:
	List of SearchResult objects with link, title, and snippet
	"""
	try:
	from azure.core.credentials import AzureKeyCredential
	from azure.search.documents import SearchClient
	except ImportError:
	log.error(
	"azure-search-documents package is not installed. "
	"Install it with: pip install azure-search-documents"
	)
	raise ImportError(
	"azure-search-documents is required for Azure AI Search. "
	"Install it with: pip install azure-search-documents"
	)

	try:
	# Create search client with API key authentication
	credential = AzureKeyCredential(api_key)
	search_client = SearchClient(
	endpoint=endpoint, index_name=index_name, credential=credential
	)

	# Perform the search
	results = search_client.search(search_text=query, top=count)

	# Convert results to list and extract fields
	search_results = []
	for result in results:
	# Azure AI Search returns documents with custom schemas
	# We need to extract common fields that might represent URL, title, and content
	# Common field names to look for:
	result_dict = dict(result)

	# Try to find URL field (common names)
	link = (
	result_dict.get("url")
	or result_dict.get("link")
	or result_dict.get("uri")
	or result_dict.get("metadata_storage_path")
	or ""
	)

	# Try to find title field (common names)
	title = (
	result_dict.get("title")
	or result_dict.get("name")
	or result_dict.get("metadata_title")
	or result_dict.get("metadata_storage_name")
	or None
	)

	# Try to find content/snippet field (common names)
	snippet = (
	result_dict.get("content")
	or result_dict.get("snippet")
	or result_dict.get("description")
	or result_dict.get("summary")
	or result_dict.get("text")
	or None
	)

	# Truncate snippet if too long
	if snippet and len(snippet) > 500:
	snippet = snippet[:497] + "..."

	if link: # Only add if we found a valid link
	search_results.append(
	{
	"link": link,
	"title": title,
	"snippet": snippet,
	}
	)

	# Apply domain filtering if specified
	if filter_list:
	search_results = get_filtered_results(search_results, filter_list)

	# Convert to SearchResult objects
	return [
	SearchResult(
	link=result["link"],
	title=result.get("title"),
	snippet=result.get("snippet"),
	)
	for result in search_results
	]

	except Exception as ex:
	log.error(f"Azure AI Search error: {ex}")
	raise ex