File size: 9,110 Bytes
33a2aaf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
import requests
import json
from typing import Optional, Dict, Any
import time

class WikiAPI:
    """Wikipedia and Wikidata API client"""
    
    def __init__(self):
        self.wikipedia_base_url = "https://en.wikipedia.org/api/rest_v1"
        self.wikidata_base_url = "https://www.wikidata.org/w/api.php"
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'TriviaVerse/1.0 (https://github.com/your-repo/triviaverse)'
        })
    
    def fetch_content(self, topic: str, language: str = 'en') -> Optional[str]:
        """Fetch content from Wikipedia"""
        try:
            # First, search for the article
            search_results = self._search_wikipedia(topic, language)
            if not search_results:
                return None
            
            # Get the first result
            page_title = search_results[0]['title']
            
            # Fetch the full content
            content = self._get_wikipedia_content(page_title, language)
            return content
            
        except Exception as e:
            print(f"Error fetching content: {e}")
            return None
    
    def _search_wikipedia(self, query: str, language: str = 'en') -> Optional[list]:
        """Search Wikipedia for articles"""
        
        # Map language codes to Wikipedia domains
        lang_domains = {
            'en': 'en.wikipedia.org',
            'hi': 'hi.wikipedia.org',
            'te': 'te.wikipedia.org',
            'ta': 'ta.wikipedia.org',
            'kn': 'kn.wikipedia.org',
            'bn': 'bn.wikipedia.org'
        }
        
        domain = lang_domains.get(language, 'en.wikipedia.org')
        
        url = f"https://{domain}/api/rest_v1/page/search/{query}"
        
        try:
            response = self.session.get(url, timeout=10)
            response.raise_for_status()
            
            data = response.json()
            return data.get('pages', [])
            
        except Exception as e:
            print(f"Search error: {e}")
            return None
    
    def _get_wikipedia_content(self, page_title: str, language: str = 'en') -> Optional[str]:
        """Get full content of a Wikipedia page"""
        
        # Map language codes to Wikipedia domains
        lang_domains = {
            'en': 'en.wikipedia.org',
            'hi': 'hi.wikipedia.org',
            'te': 'te.wikipedia.org',
            'ta': 'ta.wikipedia.org',
            'kn': 'kn.wikipedia.org',
            'bn': 'bn.wikipedia.org'
        }
        
        domain = lang_domains.get(language, 'en.wikipedia.org')
        
        # Get page summary first
        summary_url = f"https://{domain}/api/rest_v1/page/summary/{page_title}"
        
        try:
            response = self.session.get(summary_url, timeout=10)
            response.raise_for_status()
            
            summary_data = response.json()
            extract = summary_data.get('extract', '')
            
            # Try to get more detailed content
            content_url = f"https://{domain}/w/api.php"
            params = {
                'action': 'query',
                'format': 'json',
                'titles': page_title,
                'prop': 'extracts',
                'exintro': True,
                'explaintext': True,
                'exsectionformat': 'plain'
            }
            
            response = self.session.get(content_url, params=params, timeout=10)
            response.raise_for_status()
            
            data = response.json()
            pages = data.get('query', {}).get('pages', {})
            
            for page_id, page_data in pages.items():
                if 'extract' in page_data:
                    full_extract = page_data['extract']
                    # Return the longer content
                    return full_extract if len(full_extract) > len(extract) else extract
            
            return extract
            
        except Exception as e:
            print(f"Content fetch error: {e}")
            return None
    
    def get_wikidata_info(self, topic: str) -> Optional[Dict[str, Any]]:
        """Get structured data from Wikidata"""
        try:
            # Search for Wikidata entity
            search_url = f"{self.wikidata_base_url}"
            params = {
                'action': 'wbsearchentities',
                'search': topic,
                'language': 'en',
                'format': 'json'
            }
            
            response = self.session.get(search_url, params=params, timeout=10)
            response.raise_for_status()
            
            data = response.json()
            entities = data.get('search', [])
            
            if not entities:
                return None
            
            entity_id = entities[0]['id']
            
            # Get entity data
            entity_url = f"{self.wikidata_base_url}"
            params = {
                'action': 'wbgetentities',
                'ids': entity_id,
                'format': 'json',
                'languages': 'en'
            }
            
            response = self.session.get(entity_url, params=params, timeout=10)
            response.raise_for_status()
            
            data = response.json()
            entity_data = data.get('entities', {}).get(entity_id, {})
            
            return self._process_wikidata_entity(entity_data)
            
        except Exception as e:
            print(f"Wikidata error: {e}")
            return None
    
    def _process_wikidata_entity(self, entity_data: Dict[str, Any]) -> Dict[str, Any]:
        """Process Wikidata entity to extract useful information"""
        
        processed_data = {
            'label': '',
            'description': '',
            'claims': {},
            'aliases': []
        }
        
        # Extract label
        labels = entity_data.get('labels', {})
        if 'en' in labels:
            processed_data['label'] = labels['en']['value']
        
        # Extract description
        descriptions = entity_data.get('descriptions', {})
        if 'en' in descriptions:
            processed_data['description'] = descriptions['en']['value']
        
        # Extract aliases
        aliases = entity_data.get('aliases', {})
        if 'en' in aliases:
            processed_data['aliases'] = [alias['value'] for alias in aliases['en']]
        
        # Extract some important claims
        claims = entity_data.get('claims', {})
        important_properties = [
            'P31',   # instance of
            'P279',  # subclass of
            'P17',   # country
            'P569',  # date of birth
            'P570',  # date of death
            'P571',  # inception
            'P576',  # dissolved
        ]
        
        for prop in important_properties:
            if prop in claims:
                processed_data['claims'][prop] = claims[prop]
        
        return processed_data
    
    def get_related_topics(self, topic: str, limit: int = 5) -> list:
        """Get related topics for additional content"""
        try:
            # This is a simplified implementation
            # In a real app, you might use Wikipedia's "See also" sections
            # or Wikidata relationships
            
            search_results = self._search_wikipedia(topic)
            if not search_results:
                return []
            
            # Return related pages from search results
            related = []
            for result in search_results[1:limit+1]:  # Skip first result (exact match)
                related.append({
                    'title': result['title'],
                    'description': result.get('description', ''),
                    'url': result.get('content_urls', {}).get('desktop', {}).get('page', '')
                })
            
            return related
            
        except Exception as e:
            print(f"Related topics error: {e}")
            return []
    
    def get_random_article(self, language: str = 'en') -> Optional[Dict[str, str]]:
        """Get a random Wikipedia article"""
        
        lang_domains = {
            'en': 'en.wikipedia.org',
            'hi': 'hi.wikipedia.org',
            'te': 'te.wikipedia.org',
            'ta': 'ta.wikipedia.org',
            'kn': 'kn.wikipedia.org',
            'bn': 'bn.wikipedia.org'
        }
        
        domain = lang_domains.get(language, 'en.wikipedia.org')
        
        try:
            url = f"https://{domain}/api/rest_v1/page/random/summary"
            response = self.session.get(url, timeout=10)
            response.raise_for_status()
            
            data = response.json()
            return {
                'title': data.get('title', ''),
                'extract': data.get('extract', ''),
                'url': data.get('content_urls', {}).get('desktop', {}).get('page', '')
            }
            
        except Exception as e:
            print(f"Random article error: {e}")
            return None