Spaces:

RaghavenderReddy
/

TriviaVerse_38

Build error

App Files Files Community

TriviaVerse_38 / utils /wiki_api.py

RaghavenderReddy

Upload 13 files

33a2aaf verified 7 months ago

raw

history blame contribute delete

9.11 kB

	import requests
	import json
	from typing import Optional, Dict, Any
	import time

	class WikiAPI:
	"""Wikipedia and Wikidata API client"""

	def __init__(self):
	self.wikipedia_base_url = "https://en.wikipedia.org/api/rest_v1"
	self.wikidata_base_url = "https://www.wikidata.org/w/api.php"
	self.session = requests.Session()
	self.session.headers.update({
	'User-Agent': 'TriviaVerse/1.0 (https://github.com/your-repo/triviaverse)'
	})

	def fetch_content(self, topic: str, language: str = 'en') -> Optional[str]:
	"""Fetch content from Wikipedia"""
	try:
	# First, search for the article
	search_results = self._search_wikipedia(topic, language)
	if not search_results:
	return None

	# Get the first result
	page_title = search_results[0]['title']

	# Fetch the full content
	content = self._get_wikipedia_content(page_title, language)
	return content

	except Exception as e:
	print(f"Error fetching content: {e}")
	return None

	def _search_wikipedia(self, query: str, language: str = 'en') -> Optional[list]:
	"""Search Wikipedia for articles"""

	# Map language codes to Wikipedia domains
	lang_domains = {
	'en': 'en.wikipedia.org',
	'hi': 'hi.wikipedia.org',
	'te': 'te.wikipedia.org',
	'ta': 'ta.wikipedia.org',
	'kn': 'kn.wikipedia.org',
	'bn': 'bn.wikipedia.org'
	}

	domain = lang_domains.get(language, 'en.wikipedia.org')

	url = f"https://{domain}/api/rest_v1/page/search/{query}"

	try:
	response = self.session.get(url, timeout=10)
	response.raise_for_status()

	data = response.json()
	return data.get('pages', [])

	except Exception as e:
	print(f"Search error: {e}")
	return None

	def _get_wikipedia_content(self, page_title: str, language: str = 'en') -> Optional[str]:
	"""Get full content of a Wikipedia page"""

	# Map language codes to Wikipedia domains
	lang_domains = {
	'en': 'en.wikipedia.org',
	'hi': 'hi.wikipedia.org',
	'te': 'te.wikipedia.org',
	'ta': 'ta.wikipedia.org',
	'kn': 'kn.wikipedia.org',
	'bn': 'bn.wikipedia.org'
	}

	domain = lang_domains.get(language, 'en.wikipedia.org')

	# Get page summary first
	summary_url = f"https://{domain}/api/rest_v1/page/summary/{page_title}"

	try:
	response = self.session.get(summary_url, timeout=10)
	response.raise_for_status()

	summary_data = response.json()
	extract = summary_data.get('extract', '')

	# Try to get more detailed content
	content_url = f"https://{domain}/w/api.php"
	params = {
	'action': 'query',
	'format': 'json',
	'titles': page_title,
	'prop': 'extracts',
	'exintro': True,
	'explaintext': True,
	'exsectionformat': 'plain'
	}

	response = self.session.get(content_url, params=params, timeout=10)
	response.raise_for_status()

	data = response.json()
	pages = data.get('query', {}).get('pages', {})

	for page_id, page_data in pages.items():
	if 'extract' in page_data:
	full_extract = page_data['extract']
	# Return the longer content
	return full_extract if len(full_extract) > len(extract) else extract

	return extract

	except Exception as e:
	print(f"Content fetch error: {e}")
	return None

	def get_wikidata_info(self, topic: str) -> Optional[Dict[str, Any]]:
	"""Get structured data from Wikidata"""
	try:
	# Search for Wikidata entity
	search_url = f"{self.wikidata_base_url}"
	params = {
	'action': 'wbsearchentities',
	'search': topic,
	'language': 'en',
	'format': 'json'
	}

	response = self.session.get(search_url, params=params, timeout=10)
	response.raise_for_status()

	data = response.json()
	entities = data.get('search', [])

	if not entities:
	return None

	entity_id = entities[0]['id']

	# Get entity data
	entity_url = f"{self.wikidata_base_url}"
	params = {
	'action': 'wbgetentities',
	'ids': entity_id,
	'format': 'json',
	'languages': 'en'
	}

	response = self.session.get(entity_url, params=params, timeout=10)
	response.raise_for_status()

	data = response.json()
	entity_data = data.get('entities', {}).get(entity_id, {})

	return self._process_wikidata_entity(entity_data)

	except Exception as e:
	print(f"Wikidata error: {e}")
	return None

	def _process_wikidata_entity(self, entity_data: Dict[str, Any]) -> Dict[str, Any]:
	"""Process Wikidata entity to extract useful information"""

	processed_data = {
	'label': '',
	'description': '',
	'claims': {},
	'aliases': []
	}

	# Extract label
	labels = entity_data.get('labels', {})
	if 'en' in labels:
	processed_data['label'] = labels['en']['value']

	# Extract description
	descriptions = entity_data.get('descriptions', {})
	if 'en' in descriptions:
	processed_data['description'] = descriptions['en']['value']

	# Extract aliases
	aliases = entity_data.get('aliases', {})
	if 'en' in aliases:
	processed_data['aliases'] = [alias['value'] for alias in aliases['en']]

	# Extract some important claims
	claims = entity_data.get('claims', {})
	important_properties = [
	'P31', # instance of
	'P279', # subclass of
	'P17', # country
	'P569', # date of birth
	'P570', # date of death
	'P571', # inception
	'P576', # dissolved
	]

	for prop in important_properties:
	if prop in claims:
	processed_data['claims'][prop] = claims[prop]

	return processed_data

	def get_related_topics(self, topic: str, limit: int = 5) -> list:
	"""Get related topics for additional content"""
	try:
	# This is a simplified implementation
	# In a real app, you might use Wikipedia's "See also" sections
	# or Wikidata relationships

	search_results = self._search_wikipedia(topic)
	if not search_results:
	return []

	# Return related pages from search results
	related = []
	for result in search_results[1:limit+1]: # Skip first result (exact match)
	related.append({
	'title': result['title'],
	'description': result.get('description', ''),
	'url': result.get('content_urls', {}).get('desktop', {}).get('page', '')
	})

	return related

	except Exception as e:
	print(f"Related topics error: {e}")
	return []

	def get_random_article(self, language: str = 'en') -> Optional[Dict[str, str]]:
	"""Get a random Wikipedia article"""

	lang_domains = {
	'en': 'en.wikipedia.org',
	'hi': 'hi.wikipedia.org',
	'te': 'te.wikipedia.org',
	'ta': 'ta.wikipedia.org',
	'kn': 'kn.wikipedia.org',
	'bn': 'bn.wikipedia.org'
	}

	domain = lang_domains.get(language, 'en.wikipedia.org')

	try:
	url = f"https://{domain}/api/rest_v1/page/random/summary"
	response = self.session.get(url, timeout=10)
	response.raise_for_status()

	data = response.json()
	return {
	'title': data.get('title', ''),
	'extract': data.get('extract', ''),
	'url': data.get('content_urls', {}).get('desktop', {}).get('page', '')
	}

	except Exception as e:
	print(f"Random article error: {e}")
	return None