import requests import json from typing import Optional, Dict, Any import time class WikiAPI: """Wikipedia and Wikidata API client""" def __init__(self): self.wikipedia_base_url = "https://en.wikipedia.org/api/rest_v1" self.wikidata_base_url = "https://www.wikidata.org/w/api.php" self.session = requests.Session() self.session.headers.update({ 'User-Agent': 'TriviaVerse/1.0 (https://github.com/your-repo/triviaverse)' }) def fetch_content(self, topic: str, language: str = 'en') -> Optional[str]: """Fetch content from Wikipedia""" try: # First, search for the article search_results = self._search_wikipedia(topic, language) if not search_results: return None # Get the first result page_title = search_results[0]['title'] # Fetch the full content content = self._get_wikipedia_content(page_title, language) return content except Exception as e: print(f"Error fetching content: {e}") return None def _search_wikipedia(self, query: str, language: str = 'en') -> Optional[list]: """Search Wikipedia for articles""" # Map language codes to Wikipedia domains lang_domains = { 'en': 'en.wikipedia.org', 'hi': 'hi.wikipedia.org', 'te': 'te.wikipedia.org', 'ta': 'ta.wikipedia.org', 'kn': 'kn.wikipedia.org', 'bn': 'bn.wikipedia.org' } domain = lang_domains.get(language, 'en.wikipedia.org') url = f"https://{domain}/api/rest_v1/page/search/{query}" try: response = self.session.get(url, timeout=10) response.raise_for_status() data = response.json() return data.get('pages', []) except Exception as e: print(f"Search error: {e}") return None def _get_wikipedia_content(self, page_title: str, language: str = 'en') -> Optional[str]: """Get full content of a Wikipedia page""" # Map language codes to Wikipedia domains lang_domains = { 'en': 'en.wikipedia.org', 'hi': 'hi.wikipedia.org', 'te': 'te.wikipedia.org', 'ta': 'ta.wikipedia.org', 'kn': 'kn.wikipedia.org', 'bn': 'bn.wikipedia.org' } domain = lang_domains.get(language, 'en.wikipedia.org') # Get page summary first summary_url = f"https://{domain}/api/rest_v1/page/summary/{page_title}" try: response = self.session.get(summary_url, timeout=10) response.raise_for_status() summary_data = response.json() extract = summary_data.get('extract', '') # Try to get more detailed content content_url = f"https://{domain}/w/api.php" params = { 'action': 'query', 'format': 'json', 'titles': page_title, 'prop': 'extracts', 'exintro': True, 'explaintext': True, 'exsectionformat': 'plain' } response = self.session.get(content_url, params=params, timeout=10) response.raise_for_status() data = response.json() pages = data.get('query', {}).get('pages', {}) for page_id, page_data in pages.items(): if 'extract' in page_data: full_extract = page_data['extract'] # Return the longer content return full_extract if len(full_extract) > len(extract) else extract return extract except Exception as e: print(f"Content fetch error: {e}") return None def get_wikidata_info(self, topic: str) -> Optional[Dict[str, Any]]: """Get structured data from Wikidata""" try: # Search for Wikidata entity search_url = f"{self.wikidata_base_url}" params = { 'action': 'wbsearchentities', 'search': topic, 'language': 'en', 'format': 'json' } response = self.session.get(search_url, params=params, timeout=10) response.raise_for_status() data = response.json() entities = data.get('search', []) if not entities: return None entity_id = entities[0]['id'] # Get entity data entity_url = f"{self.wikidata_base_url}" params = { 'action': 'wbgetentities', 'ids': entity_id, 'format': 'json', 'languages': 'en' } response = self.session.get(entity_url, params=params, timeout=10) response.raise_for_status() data = response.json() entity_data = data.get('entities', {}).get(entity_id, {}) return self._process_wikidata_entity(entity_data) except Exception as e: print(f"Wikidata error: {e}") return None def _process_wikidata_entity(self, entity_data: Dict[str, Any]) -> Dict[str, Any]: """Process Wikidata entity to extract useful information""" processed_data = { 'label': '', 'description': '', 'claims': {}, 'aliases': [] } # Extract label labels = entity_data.get('labels', {}) if 'en' in labels: processed_data['label'] = labels['en']['value'] # Extract description descriptions = entity_data.get('descriptions', {}) if 'en' in descriptions: processed_data['description'] = descriptions['en']['value'] # Extract aliases aliases = entity_data.get('aliases', {}) if 'en' in aliases: processed_data['aliases'] = [alias['value'] for alias in aliases['en']] # Extract some important claims claims = entity_data.get('claims', {}) important_properties = [ 'P31', # instance of 'P279', # subclass of 'P17', # country 'P569', # date of birth 'P570', # date of death 'P571', # inception 'P576', # dissolved ] for prop in important_properties: if prop in claims: processed_data['claims'][prop] = claims[prop] return processed_data def get_related_topics(self, topic: str, limit: int = 5) -> list: """Get related topics for additional content""" try: # This is a simplified implementation # In a real app, you might use Wikipedia's "See also" sections # or Wikidata relationships search_results = self._search_wikipedia(topic) if not search_results: return [] # Return related pages from search results related = [] for result in search_results[1:limit+1]: # Skip first result (exact match) related.append({ 'title': result['title'], 'description': result.get('description', ''), 'url': result.get('content_urls', {}).get('desktop', {}).get('page', '') }) return related except Exception as e: print(f"Related topics error: {e}") return [] def get_random_article(self, language: str = 'en') -> Optional[Dict[str, str]]: """Get a random Wikipedia article""" lang_domains = { 'en': 'en.wikipedia.org', 'hi': 'hi.wikipedia.org', 'te': 'te.wikipedia.org', 'ta': 'ta.wikipedia.org', 'kn': 'kn.wikipedia.org', 'bn': 'bn.wikipedia.org' } domain = lang_domains.get(language, 'en.wikipedia.org') try: url = f"https://{domain}/api/rest_v1/page/random/summary" response = self.session.get(url, timeout=10) response.raise_for_status() data = response.json() return { 'title': data.get('title', ''), 'extract': data.get('extract', ''), 'url': data.get('content_urls', {}).get('desktop', {}).get('page', '') } except Exception as e: print(f"Random article error: {e}") return None