Spaces:
Sleeping
Sleeping
| """ | |
| WikiFit - Wikimedia API Integration Module | |
| This module provides functions to interact with various Wikimedia APIs | |
| to retrieve health and fitness information. | |
| """ | |
| import requests | |
| import logging | |
| # Setup logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| # Cache durations (in seconds) | |
| CACHE_TTL = 3600 # 1 hour | |
| def get_wikipedia_summary(term): | |
| """ | |
| Get a summary of a topic from Wikipedia. | |
| Args: | |
| term: The search term/topic | |
| Returns: | |
| str: Summary text or error message | |
| """ | |
| try: | |
| url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{term}" | |
| response = requests.get(url, timeout=10) | |
| if response.status_code == 200: | |
| data = response.json() | |
| extract = data.get("extract", "") | |
| if not extract: | |
| # Check if we have an alternative like disambiguation | |
| if data.get("type") == "disambiguation": | |
| return f"'{term}' refers to multiple topics. Please try a more specific search term." | |
| return "No summary found. This topic might not have an article on Wikipedia yet." | |
| return extract | |
| elif response.status_code == 404: | |
| return f"The topic '{term}' was not found on Wikipedia. Please check spelling or try another term." | |
| else: | |
| logging.error(f"Wikipedia API error: {response.status_code} for term '{term}'") | |
| return f"Error retrieving information: HTTP {response.status_code}" | |
| except requests.RequestException as e: | |
| logging.error(f"Wikipedia request error for '{term}': {str(e)}") | |
| return "Connection error. Please check your internet connection and try again later." | |
| def get_wiktionary_definition(term): | |
| """Get word definitions from Wiktionary""" | |
| try: | |
| url = "https://en.wiktionary.org/w/api.php" | |
| params = { | |
| "action": "query", | |
| "format": "json", | |
| "titles": term, | |
| "prop": "extracts", | |
| "exsectionformat": "plain", | |
| "exsentences": 5, | |
| "explaintext": True | |
| } | |
| response = requests.get(url, params=params, timeout=10) | |
| if response.status_code == 200: | |
| data = response.json() | |
| pages = data.get("query", {}).get("pages", {}) | |
| # Extract the first page content (there should only be one) | |
| for page_id in pages: | |
| if "extract" in pages[page_id]: | |
| return pages[page_id]["extract"] | |
| return "No definition found." | |
| else: | |
| return f"Error retrieving definition: HTTP {response.status_code}" | |
| except requests.RequestException as e: | |
| logging.error(f"Wiktionary request error: {str(e)}") | |
| return "Connection error. Please try again later." | |
| def get_wikiquote_quotes(term): | |
| """Get quotes related to a topic from Wikiquote""" | |
| try: | |
| url = "https://en.wikiquote.org/w/api.php" | |
| params = { | |
| "action": "query", | |
| "format": "json", | |
| "titles": term, | |
| "prop": "extracts", | |
| "exsentences": 5, | |
| "explaintext": True | |
| } | |
| response = requests.get(url, params=params, timeout=10) | |
| if response.status_code == 200: | |
| data = response.json() | |
| pages = data.get("query", {}).get("pages", {}) | |
| # Extract the first page content (there should only be one) | |
| for page_id in pages: | |
| if int(page_id) > 0 and "extract" in pages[page_id]: # Skip missing pages | |
| content = pages[page_id]["extract"].strip() | |
| if content: | |
| return content | |
| return "No quotes found for this topic." | |
| else: | |
| return f"Error retrieving quotes: HTTP {response.status_code}" | |
| except requests.RequestException as e: | |
| logging.error(f"Wikiquote request error: {str(e)}") | |
| return "Connection error. Please try again later." | |
| def get_wikibooks_content(term): | |
| """Get educational content from Wikibooks""" | |
| try: | |
| url = "https://en.wikibooks.org/w/api.php" | |
| params = { | |
| "action": "query", | |
| "format": "json", | |
| "titles": term, | |
| "prop": "extracts", | |
| "exsentences": 10, | |
| "explaintext": True | |
| } | |
| response = requests.get(url, params=params, timeout=10) | |
| if response.status_code == 200: | |
| data = response.json() | |
| pages = data.get("query", {}).get("pages", {}) | |
| # Extract the first page content | |
| for page_id in pages: | |
| if int(page_id) > 0 and "extract" in pages[page_id]: | |
| return pages[page_id]["extract"] | |
| return "No Wikibooks content found for this topic." | |
| else: | |
| return f"Error retrieving content: HTTP {response.status_code}" | |
| except requests.RequestException as e: | |
| logging.error(f"Wikibooks request error: {str(e)}") | |
| return "Connection error. Please try again later." | |
| def get_wikimedia_commons_images(term, limit=5): | |
| """Get relevant images from Wikimedia Commons""" | |
| try: | |
| url = "https://commons.wikimedia.org/w/api.php" | |
| params = { | |
| "action": "query", | |
| "format": "json", | |
| "list": "search", | |
| "srsearch": f"{term} haswbstatement:P180={term}", # Search for images with the term as subject | |
| "srnamespace": 6, # File namespace | |
| "srlimit": limit | |
| } | |
| response = requests.get(url, params=params, timeout=10) | |
| if response.status_code == 200: | |
| data = response.json() | |
| search_results = data.get("query", {}).get("search", []) | |
| image_titles = [] | |
| for result in search_results: | |
| if "title" in result: | |
| image_titles.append(result["title"]) | |
| # If we found images, get their URLs | |
| image_data = [] | |
| if image_titles: | |
| file_titles = "|".join(image_titles) | |
| image_params = { | |
| "action": "query", | |
| "format": "json", | |
| "titles": file_titles, | |
| "prop": "imageinfo", | |
| "iiprop": "url|extmetadata", | |
| "iiurlwidth": 300 # Thumbnail width | |
| } | |
| img_response = requests.get(url, params=image_params, timeout=10) | |
| if img_response.status_code == 200: | |
| img_data = img_response.json() | |
| pages = img_data.get("query", {}).get("pages", {}) | |
| for page_id in pages: | |
| page = pages[page_id] | |
| if "imageinfo" in page and page["imageinfo"]: | |
| info = page["imageinfo"][0] | |
| title = page.get("title", "").replace("File:", "") | |
| thumb_url = info.get("thumburl", "") | |
| description = info.get("extmetadata", {}).get("ImageDescription", {}).get("value", "") | |
| # Clean HTML from description | |
| description = description.replace("<p>", "").replace("</p>", "") | |
| if thumb_url: | |
| image_data.append({ | |
| "title": title, | |
| "url": thumb_url, | |
| "description": description | |
| }) | |
| return image_data | |
| else: | |
| logging.error(f"Wikimedia Commons API error: {response.status_code} for term '{term}'") | |
| return [] | |
| except requests.RequestException as e: | |
| logging.error(f"Wikimedia Commons request error: {str(e)}") | |
| return [] | |
| def get_wikisource_texts(term): | |
| """Get health-related texts from Wikisource""" | |
| try: | |
| url = "https://en.wikisource.org/w/api.php" | |
| params = { | |
| "action": "query", | |
| "format": "json", | |
| "list": "search", | |
| "srsearch": term, | |
| "srlimit": 3 | |
| } | |
| response = requests.get(url, params=params, timeout=10) | |
| if response.status_code == 200: | |
| data = response.json() | |
| search_results = data.get("query", {}).get("search", []) | |
| text_data = [] | |
| for result in search_results: | |
| title = result.get("title", "") | |
| snippet = result.get("snippet", "").replace("<span class=\"searchmatch\">", "").replace("</span>", "") | |
| text_data.append({ | |
| "title": title, | |
| "snippet": snippet | |
| }) | |
| return text_data | |
| else: | |
| logging.error(f"Wikisource API error: {response.status_code} for term '{term}'") | |
| return [] | |
| except requests.RequestException as e: | |
| logging.error(f"Wikisource request error: {str(e)}") | |
| return [] | |
| def get_wikiversity_resources(term): | |
| """Get educational resources from Wikiversity""" | |
| try: | |
| url = "https://en.wikiversity.org/w/api.php" | |
| params = { | |
| "action": "query", | |
| "format": "json", | |
| "titles": term, | |
| "prop": "extracts", | |
| "exsentences": 5, | |
| "explaintext": True | |
| } | |
| response = requests.get(url, params=params, timeout=10) | |
| if response.status_code == 200: | |
| data = response.json() | |
| pages = data.get("query", {}).get("pages", {}) | |
| # Extract the first page content | |
| for page_id in pages: | |
| if int(page_id) > 0 and "extract" in pages[page_id]: | |
| return pages[page_id]["extract"] | |
| return "No Wikiversity resources found for this topic." | |
| else: | |
| return f"Error retrieving resources: HTTP {response.status_code}" | |
| except requests.RequestException as e: | |
| logging.error(f"Wikiversity request error: {str(e)}") | |
| return "Connection error. Please try again later." | |
| def get_wikispecies_info(species_name): | |
| """Get species information from Wikispecies""" | |
| try: | |
| url = "https://species.wikimedia.org/w/api.php" | |
| params = { | |
| "action": "query", | |
| "format": "json", | |
| "titles": species_name, | |
| "prop": "extracts", | |
| "explaintext": True | |
| } | |
| response = requests.get(url, params=params, timeout=10) | |
| if response.status_code == 200: | |
| data = response.json() | |
| pages = data.get("query", {}).get("pages", {}) | |
| # Extract the first page content | |
| for page_id in pages: | |
| if int(page_id) > 0 and "extract" in pages[page_id]: | |
| return pages[page_id]["extract"] | |
| return "No species information found." | |
| else: | |
| return f"Error retrieving species information: HTTP {response.status_code}" | |
| except requests.RequestException as e: | |
| logging.error(f"Wikispecies request error: {str(e)}") | |
| return "Connection error. Please try again later." | |
| def get_wikidata_health_info(term): | |
| """Get structured health data from Wikidata""" | |
| try: | |
| # First, find the Wikidata ID for the term | |
| url = "https://www.wikidata.org/w/api.php" | |
| params = { | |
| "action": "wbsearchentities", | |
| "format": "json", | |
| "search": term, | |
| "language": "en" | |
| } | |
| response = requests.get(url, params=params, timeout=10) | |
| if response.status_code == 200: | |
| data = response.json() | |
| search_results = data.get("search", []) | |
| if not search_results: | |
| return "No Wikidata information found for this term." | |
| # Get the first result's ID | |
| entity_id = search_results[0].get("id") | |
| # Now get the entity data | |
| entity_params = { | |
| "action": "wbgetentities", | |
| "format": "json", | |
| "ids": entity_id, | |
| "languages": "en" | |
| } | |
| entity_response = requests.get(url, params=entity_params, timeout=10) | |
| if entity_response.status_code == 200: | |
| entity_data = entity_response.json() | |
| entities = entity_data.get("entities", {}) | |
| if entity_id in entities: | |
| entity = entities[entity_id] | |
| # Extract label and description | |
| label = entity.get("labels", {}).get("en", {}).get("value", "No label") | |
| description = entity.get("descriptions", {}).get("en", {}).get("value", "No description") | |
| # Extract some claims/properties | |
| claims = entity.get("claims", {}) | |
| properties = {} | |
| # Common health-related properties | |
| property_map = { | |
| "P2175": "medical condition treated", | |
| "P2176": "drug used for treatment", | |
| "P780": "symptoms", | |
| "P1050": "medical condition", | |
| "P1995": "health specialty" | |
| } | |
| for prop_id, prop_name in property_map.items(): | |
| if prop_id in claims: | |
| values = [] | |
| for claim in claims[prop_id]: | |
| mainsnak = claim.get("mainsnak", {}) | |
| if mainsnak.get("datatype") == "wikibase-item" and "datavalue" in mainsnak: | |
| value_id = mainsnak["datavalue"]["value"]["id"] | |
| values.append(value_id) | |
| if values: | |
| properties[prop_name] = values | |
| return { | |
| "label": label, | |
| "description": description, | |
| "properties": properties | |
| } | |
| return "No detailed Wikidata information available." | |
| else: | |
| logging.error(f"Wikidata API error: {response.status_code} for term '{term}'") | |
| return f"Error retrieving Wikidata: HTTP {response.status_code}" | |
| except requests.RequestException as e: | |
| logging.error(f"Wikidata request error: {str(e)}") | |
| return "Connection error. Please try again later." | |
| # Add a unified search function to search across all Wikimedia platforms | |
| def search_all_wikimedia(term): | |
| """ | |
| Search for a term across all Wikimedia platforms. | |
| Args: | |
| term: Search term | |
| Returns: | |
| dict: Results from all Wikimedia sources | |
| """ | |
| # Normalize the term | |
| search_term = term.strip().replace(" ", "_") | |
| # Create a results dictionary | |
| results = { | |
| "wikipedia": None, | |
| "wiktionary": None, | |
| "wikiquote": None, | |
| "wikibooks": None, | |
| "commons": None, | |
| "wikisource": None, | |
| "wikiversity": None, | |
| "wikispecies": None, | |
| "wikidata": None | |
| } | |
| # Get results from each platform | |
| results["wikipedia"] = get_wikipedia_summary(search_term) | |
| results["wiktionary"] = get_wiktionary_definition(search_term) | |
| results["wikiquote"] = get_wikiquote_quotes(search_term) | |
| results["wikibooks"] = get_wikibooks_content(search_term) | |
| results["commons"] = get_wikimedia_commons_images(search_term) | |
| results["wikisource"] = get_wikisource_texts(search_term) | |
| results["wikiversity"] = get_wikiversity_resources(search_term) | |
| results["wikispecies"] = get_wikispecies_info(search_term) | |
| results["wikidata"] = get_wikidata_health_info(search_term) | |
| return results | |