Spaces:
Build error
Build error
| import requests | |
| import json | |
| from typing import Optional, Dict, Any | |
| import time | |
| class WikiAPI: | |
| """Wikipedia and Wikidata API client""" | |
| def __init__(self): | |
| self.wikipedia_base_url = "https://en.wikipedia.org/api/rest_v1" | |
| self.wikidata_base_url = "https://www.wikidata.org/w/api.php" | |
| self.session = requests.Session() | |
| self.session.headers.update({ | |
| 'User-Agent': 'TriviaVerse/1.0 (https://github.com/your-repo/triviaverse)' | |
| }) | |
| def fetch_content(self, topic: str, language: str = 'en') -> Optional[str]: | |
| """Fetch content from Wikipedia""" | |
| try: | |
| # First, search for the article | |
| search_results = self._search_wikipedia(topic, language) | |
| if not search_results: | |
| return None | |
| # Get the first result | |
| page_title = search_results[0]['title'] | |
| # Fetch the full content | |
| content = self._get_wikipedia_content(page_title, language) | |
| return content | |
| except Exception as e: | |
| print(f"Error fetching content: {e}") | |
| return None | |
| def _search_wikipedia(self, query: str, language: str = 'en') -> Optional[list]: | |
| """Search Wikipedia for articles""" | |
| # Map language codes to Wikipedia domains | |
| lang_domains = { | |
| 'en': 'en.wikipedia.org', | |
| 'hi': 'hi.wikipedia.org', | |
| 'te': 'te.wikipedia.org', | |
| 'ta': 'ta.wikipedia.org', | |
| 'kn': 'kn.wikipedia.org', | |
| 'bn': 'bn.wikipedia.org' | |
| } | |
| domain = lang_domains.get(language, 'en.wikipedia.org') | |
| url = f"https://{domain}/api/rest_v1/page/search/{query}" | |
| try: | |
| response = self.session.get(url, timeout=10) | |
| response.raise_for_status() | |
| data = response.json() | |
| return data.get('pages', []) | |
| except Exception as e: | |
| print(f"Search error: {e}") | |
| return None | |
| def _get_wikipedia_content(self, page_title: str, language: str = 'en') -> Optional[str]: | |
| """Get full content of a Wikipedia page""" | |
| # Map language codes to Wikipedia domains | |
| lang_domains = { | |
| 'en': 'en.wikipedia.org', | |
| 'hi': 'hi.wikipedia.org', | |
| 'te': 'te.wikipedia.org', | |
| 'ta': 'ta.wikipedia.org', | |
| 'kn': 'kn.wikipedia.org', | |
| 'bn': 'bn.wikipedia.org' | |
| } | |
| domain = lang_domains.get(language, 'en.wikipedia.org') | |
| # Get page summary first | |
| summary_url = f"https://{domain}/api/rest_v1/page/summary/{page_title}" | |
| try: | |
| response = self.session.get(summary_url, timeout=10) | |
| response.raise_for_status() | |
| summary_data = response.json() | |
| extract = summary_data.get('extract', '') | |
| # Try to get more detailed content | |
| content_url = f"https://{domain}/w/api.php" | |
| params = { | |
| 'action': 'query', | |
| 'format': 'json', | |
| 'titles': page_title, | |
| 'prop': 'extracts', | |
| 'exintro': True, | |
| 'explaintext': True, | |
| 'exsectionformat': 'plain' | |
| } | |
| response = self.session.get(content_url, params=params, timeout=10) | |
| response.raise_for_status() | |
| data = response.json() | |
| pages = data.get('query', {}).get('pages', {}) | |
| for page_id, page_data in pages.items(): | |
| if 'extract' in page_data: | |
| full_extract = page_data['extract'] | |
| # Return the longer content | |
| return full_extract if len(full_extract) > len(extract) else extract | |
| return extract | |
| except Exception as e: | |
| print(f"Content fetch error: {e}") | |
| return None | |
| def get_wikidata_info(self, topic: str) -> Optional[Dict[str, Any]]: | |
| """Get structured data from Wikidata""" | |
| try: | |
| # Search for Wikidata entity | |
| search_url = f"{self.wikidata_base_url}" | |
| params = { | |
| 'action': 'wbsearchentities', | |
| 'search': topic, | |
| 'language': 'en', | |
| 'format': 'json' | |
| } | |
| response = self.session.get(search_url, params=params, timeout=10) | |
| response.raise_for_status() | |
| data = response.json() | |
| entities = data.get('search', []) | |
| if not entities: | |
| return None | |
| entity_id = entities[0]['id'] | |
| # Get entity data | |
| entity_url = f"{self.wikidata_base_url}" | |
| params = { | |
| 'action': 'wbgetentities', | |
| 'ids': entity_id, | |
| 'format': 'json', | |
| 'languages': 'en' | |
| } | |
| response = self.session.get(entity_url, params=params, timeout=10) | |
| response.raise_for_status() | |
| data = response.json() | |
| entity_data = data.get('entities', {}).get(entity_id, {}) | |
| return self._process_wikidata_entity(entity_data) | |
| except Exception as e: | |
| print(f"Wikidata error: {e}") | |
| return None | |
| def _process_wikidata_entity(self, entity_data: Dict[str, Any]) -> Dict[str, Any]: | |
| """Process Wikidata entity to extract useful information""" | |
| processed_data = { | |
| 'label': '', | |
| 'description': '', | |
| 'claims': {}, | |
| 'aliases': [] | |
| } | |
| # Extract label | |
| labels = entity_data.get('labels', {}) | |
| if 'en' in labels: | |
| processed_data['label'] = labels['en']['value'] | |
| # Extract description | |
| descriptions = entity_data.get('descriptions', {}) | |
| if 'en' in descriptions: | |
| processed_data['description'] = descriptions['en']['value'] | |
| # Extract aliases | |
| aliases = entity_data.get('aliases', {}) | |
| if 'en' in aliases: | |
| processed_data['aliases'] = [alias['value'] for alias in aliases['en']] | |
| # Extract some important claims | |
| claims = entity_data.get('claims', {}) | |
| important_properties = [ | |
| 'P31', # instance of | |
| 'P279', # subclass of | |
| 'P17', # country | |
| 'P569', # date of birth | |
| 'P570', # date of death | |
| 'P571', # inception | |
| 'P576', # dissolved | |
| ] | |
| for prop in important_properties: | |
| if prop in claims: | |
| processed_data['claims'][prop] = claims[prop] | |
| return processed_data | |
| def get_related_topics(self, topic: str, limit: int = 5) -> list: | |
| """Get related topics for additional content""" | |
| try: | |
| # This is a simplified implementation | |
| # In a real app, you might use Wikipedia's "See also" sections | |
| # or Wikidata relationships | |
| search_results = self._search_wikipedia(topic) | |
| if not search_results: | |
| return [] | |
| # Return related pages from search results | |
| related = [] | |
| for result in search_results[1:limit+1]: # Skip first result (exact match) | |
| related.append({ | |
| 'title': result['title'], | |
| 'description': result.get('description', ''), | |
| 'url': result.get('content_urls', {}).get('desktop', {}).get('page', '') | |
| }) | |
| return related | |
| except Exception as e: | |
| print(f"Related topics error: {e}") | |
| return [] | |
| def get_random_article(self, language: str = 'en') -> Optional[Dict[str, str]]: | |
| """Get a random Wikipedia article""" | |
| lang_domains = { | |
| 'en': 'en.wikipedia.org', | |
| 'hi': 'hi.wikipedia.org', | |
| 'te': 'te.wikipedia.org', | |
| 'ta': 'ta.wikipedia.org', | |
| 'kn': 'kn.wikipedia.org', | |
| 'bn': 'bn.wikipedia.org' | |
| } | |
| domain = lang_domains.get(language, 'en.wikipedia.org') | |
| try: | |
| url = f"https://{domain}/api/rest_v1/page/random/summary" | |
| response = self.session.get(url, timeout=10) | |
| response.raise_for_status() | |
| data = response.json() | |
| return { | |
| 'title': data.get('title', ''), | |
| 'extract': data.get('extract', ''), | |
| 'url': data.get('content_urls', {}).get('desktop', {}).get('page', '') | |
| } | |
| except Exception as e: | |
| print(f"Random article error: {e}") | |
| return None |