Spaces:

Bharath370
/

Trivia5

Sleeping

App Files Files Community

Trivia5 / src /modules /api_utils.py

Bharath370

Upload 102 files

582bf6b verified 7 months ago

raw

history blame contribute delete

8 kB

	# modules/api_utils.py
	"""API utility functions for Wikimedia services"""

	import requests
	from typing import Dict, List, Optional
	import time
	import random # Import the random module
	from config.settings import (
	WIKIPEDIA_API,
	WIKIDATA_API,
	WIKIBOOKS_API,
	WIKI_REST_API,
	CACHE_TIMEOUT,
	)

	# Cache for API responses
	_cache = {}


	def _get_cached_or_fetch(url: str, params: Dict = None) -> Optional[Dict]:
	"""Get cached response or fetch from API. Uses a simple in-memory cache."""
	cache_key = f"{url}_{str(params)}"

	if cache_key in _cache:
	cached_data, timestamp = _cache[cache_key]
	if time.time() - timestamp < CACHE_TIMEOUT: # Fixed: changed _cache_timeout to CACHE_TIMEOUT
	return cached_data

	try:
	response = requests.get(
	url, params=params, timeout=10
	) # Increased timeout for robustness
	if response.status_code == 200:
	data = response.json()
	_cache[cache_key] = (data, time.time() + CACHE_TIMEOUT)
	return data
	except requests.exceptions.RequestException as e:
	print(f"API request error: {e}")

	return None


	def fetch_wikipedia_summary(topic: str) -> Optional[Dict]:
	"""Fetch Wikipedia page summary with caching"""
	return _get_cached_or_fetch(f"{WIKI_REST_API}{topic}")


	def search_wikipedia(query: str, limit: int = 5) -> List[str]:
	"""Search Wikipedia for topics"""
	params = {"action": "opensearch", "search": query, "limit": limit, "format": "json"}

	data = _get_cached_or_fetch(WIKIPEDIA_API, params)
	if data and len(data) > 1:
	return data[1]
	return []


	def fetch_wikidata_entity(entity_id: str) -> Optional[Dict]:
	"""Fetch Wikidata entity information"""
	params = {
	"action": "wbgetentities",
	"ids": entity_id,
	"format": "json",
	"languages": "en",
	}

	return _get_cached_or_fetch(WIKIDATA_API, params)


	def fetch_wikipedia_categories(page_title: str) -> List[str]:
	"""Fetch categories for a Wikipedia page"""
	params = {
	"action": "query",
	"prop": "categories",
	"titles": page_title,
	"format": "json",
	"cllimit": 10,
	}

	data = _get_cached_or_fetch(WIKIPEDIA_API, params)
	if data:
	pages = data.get("query", {}).get("pages", {})
	for page_id, page_data in pages.items():
	categories = page_data.get("categories", [])
	return [cat["title"].replace("Category:", "") for cat in categories]
	return []


	def fetch_related_topics(topic: str, limit: int = 5) -> List[str]:
	"""Fetch related topics from Wikipedia"""
	params = {
	"action": "query",
	"list": "search",
	"srsearch": topic,
	"srlimit": limit,
	"format": "json",
	}

	data = _get_cached_or_fetch(WIKIPEDIA_API, params)
	if data:
	search_results = data.get("query", {}).get("search", [])
	return [
	result["title"] for result in search_results if result["title"] != topic
	]
	return []


	def fetch_wikibooks_content(topic: str) -> Optional[str]:
	"""Fetch content from Wikibooks"""
	params = {"action": "query", "list": "search", "srsearch": topic, "format": "json"}

	data = _get_cached_or_fetch(WIKIBOOKS_API, params)
	if data:
	search_results = data.get("query", {}).get("search", [])
	if search_results:
	return search_results[0].get("snippet", "")
	return None

	def fetch_wikipedia_page_details(topic: str) -> Optional[Dict]:
	"""Fetch detailed Wikipedia page content and images."""
	params = {
	"action": "query",
	"format": "json",
	"titles": topic,
	"prop": "extracts\|pageimages",
	"exintro": False, # Get full extract
	"explaintext": True, # Get plain text
	"pithumbsize": 200, # Thumbnail size for images
	"redirects": 1 # Resolve redirects
	}
	data = _get_cached_or_fetch(WIKIPEDIA_API, params)

	if data and "query" in data and "pages" in data["query"]:
	page_id = next(iter(data["query"]["pages"])) # Get the first page ID
	page_data = data["query"]["pages"][page_id]

	if "missing" in page_data:
	return None # Page not found

	extract = page_data.get("extract", "")
	images = []
	if "pageimages" in page_data and "thumbnail" in page_data["pageimages"]:
	images.append(page_data["pageimages"]["thumbnail"]["source"])

	return {
	"title": page_data.get("title"),
	"extract": extract,
	"images": images,
	"full_url": f"https://en.wikipedia.org/wiki/{page_data.get('title').replace(' ', '_')}"
	}
	return None

	def fetch_wikipedia_images(topic: str, limit: int = 1) -> List[str]:
	"""Fetch image URLs for a given topic from Wikipedia, prioritizing relevant images."""
	image_urls = []

	# First, try to get images directly from pageimages (main image/thumbnail)
	page_details_params = {
	"action": "query",
	"format": "json",
	"titles": topic,
	"prop": "pageimages",
	"pithumbsize": 400, # Larger thumbnail
	"redirects": 1
	}
	page_details_data = _get_cached_or_fetch(WIKIPEDIA_API, page_details_params)
	if page_details_data and "query" in page_details_data and "pages" in page_details_data["query"]:
	page_id = next(iter(page_details_data["query"]["pages"]))
	page_data = page_details_data["query"]["pages"][page_id]
	if "pageimages" in page_data and "thumbnail" in page_data["pageimages"]:
	image_urls.append(page_data["pageimages"]["thumbnail"]["source"])
	if len(image_urls) >= limit:
	return image_urls

	# If not enough images, try fetching all images from the page and filter
	params = {
	"action": "query",
	"format": "json",
	"titles": topic,
	"prop": "images",
	"imlimit": "max", # Fetch all images used on the page
	"redirects": 1
	}
	data = _get_cached_or_fetch(WIKIPEDIA_API, params)

	if data and "query" in data and "pages" in data["query"]:
	page_id = next(iter(data["query"]["pages"]))
	page_data = data["query"]["pages"][page_id]

	if "images" in page_data:
	all_page_images = []
	for img_info in page_data["images"]:
	# Basic relevance heuristic: check if topic keywords are in image title
	if topic.lower() in img_info["title"].lower() or any(keyword.lower() in img_info["title"].lower() for keyword in topic.split()):
	all_page_images.append(img_info["title"])

	random.shuffle(all_page_images) # Shuffle to get different relevant images each time

	for image_title in all_page_images:
	if len(image_urls) >= limit:
	break
	# Fetch image info to get direct URL
	image_params = {
	"action": "query",
	"format": "json",
	"titles": image_title,
	"prop": "imageinfo",
	"iiprop": "url",
	}
	image_data = _get_cached_or_fetch(WIKIPEDIA_API, image_params)
	if image_data and "query" in image_data and "pages" in image_data["query"]:
	img_page_id = next(iter(image_data["query"]["pages"]))
	img_page_data = image_data["query"]["pages"][img_page_id]
	if "imageinfo" in img_page_data and img_page_data["imageinfo"]:
	image_urls.append(img_page_data["imageinfo"][0]["url"])

	return image_urls[:limit] # Ensure limit is respected