pranav8tripathi@gmail.com commited on
Commit
a1a3895
·
1 Parent(s): b44d0b9

wikimedia setup completed

Browse files
.env CHANGED
@@ -1 +1,7 @@
1
- DEEPSEEK_API_KEY=sk-153c4decdf4e4a79995c685af7fad5c8
 
 
 
 
 
 
 
1
+ DEEPSEEK_API_KEY=sk-153c4decdf4e4a79995c685af7fad5c8
2
+
3
+ # Wikimedia API Credentials
4
+ WIKIMEDIA_CLIENT_ID=2a9587beb7f14348fec94383ab8eb380
5
+ WIKIMEDIA_CLIENT_SECRET=7d49f5ada620d25913c78d3cd934152eee49355a
6
+ WIKIMEDIA_APP_NAME=BizInsights
7
+ WIKIMEDIA_EMAIL=developer@yugensys.co # replace with your contact email
app/__pycache__/config.cpython-313.pyc CHANGED
Binary files a/app/__pycache__/config.cpython-313.pyc and b/app/__pycache__/config.cpython-313.pyc differ
 
app/__pycache__/main.cpython-313.pyc CHANGED
Binary files a/app/__pycache__/main.cpython-313.pyc and b/app/__pycache__/main.cpython-313.pyc differ
 
app/config.py CHANGED
@@ -9,8 +9,11 @@ class Settings(BaseSettings):
9
  DEEPSEEK_API_KEY: str
10
  DEEPSEEK_API_URL: str = "https://api.deepseek.com/v1/chat/completions"
11
  DEEPSEEK_ENDPOINT: Optional[str] = None # For backward compatibility
12
- NEWS_API_KEY: Optional[str] = None
13
- CRUNCHBASE_API_KEY: Optional[str] = None
 
 
 
14
 
15
  # Application settings
16
  APP_NAME: str = "RivalLens API"
 
9
  DEEPSEEK_API_KEY: str
10
  DEEPSEEK_API_URL: str = "https://api.deepseek.com/v1/chat/completions"
11
  DEEPSEEK_ENDPOINT: Optional[str] = None # For backward compatibility
12
+
13
+ # Wikimedia API Settings
14
+ WIKIMEDIA_ACCESS_TOKEN: Optional[str] = None
15
+ WIKIMEDIA_APP_NAME: str = "BizInsights"
16
+ WIKIMEDIA_EMAIL: str = "your-email@example.com"
17
 
18
  # Application settings
19
  APP_NAME: str = "RivalLens API"
app/data_sources/__pycache__/wikimedia.cpython-313.pyc ADDED
Binary file (11.5 kB). View file
 
app/data_sources/wikimedia.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Wikimedia API client for fetching data from Wikipedia and other Wikimedia projects."""
2
+ import aiohttp
3
+ import asyncio
4
+ import json
5
+ import logging
6
+ from datetime import datetime, timezone
7
+ from typing import Dict, Any, Optional, List, Union
8
+
9
+ class WikimediaClient:
10
+ """Client for interacting with the Wikimedia API."""
11
+
12
+ def __init__(
13
+ self,
14
+ app_name: str = "BizInsights",
15
+ email: str = "developer@yugensys.co",
16
+ api_base_url: str = "https://en.wikipedia.org/w/api.php"
17
+ ) -> None:
18
+ """Initialize the Wikimedia client.
19
+
20
+ Args:
21
+ app_name: Name of your application (for User-Agent header)
22
+ email: Your email address (for contact in case of issues)
23
+ api_base_url: Base URL for the MediaWiki API
24
+ """
25
+ self.api_base_url = api_base_url.rstrip("/")
26
+ self._session = None
27
+ self._user_agent = f"{app_name}/1.0 ({email}) Python/aiohttp"
28
+
29
+ async def _make_api_request(
30
+ self,
31
+ params: Dict[str, Any],
32
+ endpoint: str = ""
33
+ ) -> Dict[str, Any]:
34
+ """Make a request to the MediaWiki API.
35
+
36
+ Args:
37
+ params: Query parameters for the API request
38
+ endpoint: API endpoint (defaults to the main API endpoint)
39
+
40
+ Returns:
41
+ Dict containing the JSON response or error information
42
+ """
43
+ url = f"{self.api_base_url}/{endpoint.lstrip('/')}"
44
+
45
+ # Set default parameters
46
+ params.setdefault("format", "json")
47
+ params.setdefault("formatversion", "2")
48
+
49
+ headers = {
50
+ "User-Agent": self._user_agent,
51
+ "Accept": "application/json"
52
+ }
53
+
54
+ try:
55
+ if not self._session or self._session.closed:
56
+ self._session = aiohttp.ClientSession()
57
+
58
+ async with self._session.get(
59
+ url,
60
+ params=params,
61
+ headers=headers
62
+ ) as response:
63
+ if response.status != 200:
64
+ error_text = await response.text()
65
+ return {
66
+ "error": f"API request failed with status {response.status}",
67
+ "status_code": response.status,
68
+ "response": error_text
69
+ }
70
+
71
+ try:
72
+ return await response.json()
73
+ except Exception as e:
74
+ return {
75
+ "error": f"Failed to parse JSON response: {str(e)}",
76
+ "status_code": response.status,
77
+ "response_text": await response.text()
78
+ }
79
+
80
+ except Exception as e:
81
+ return {
82
+ "error": f"Request failed: {str(e)}"
83
+ }
84
+
85
+ async def _make_authenticated_request(
86
+ self,
87
+ method: str,
88
+ endpoint: str,
89
+ params: Optional[Dict[str, Any]] = None,
90
+ json_data: Optional[Dict[str, Any]] = None,
91
+ is_api: bool = False
92
+ ) -> Dict[str, Any]:
93
+ """Make a request to the Wikimedia API.
94
+
95
+ Args:
96
+ method: HTTP method (GET, POST, etc.) - Only GET is supported for public APIs
97
+ endpoint: API endpoint (without base URL)
98
+ params: Query parameters
99
+ json_data: JSON payload (not used for GET requests)
100
+ is_api: Whether to use the MediaWiki API base URL
101
+
102
+ Returns:
103
+ Dict containing the JSON response or error information
104
+ """
105
+ # For public read-only access, we only support GET requests
106
+ if method.upper() != 'GET':
107
+ return {"error": "Only GET requests are supported for public API access"}
108
+
109
+ # Use the _make_api_request for all API calls
110
+ return await self._make_api_request(params or {}, endpoint)
111
+
112
+ async def _process_response(self, response: aiohttp.ClientResponse, url: str) -> Dict[str, Any]:
113
+ """Process the API response.
114
+
115
+ Args:
116
+ response: aiohttp response object
117
+ url: The URL that was requested
118
+
119
+ Returns:
120
+ Dict containing the parsed JSON response or error information
121
+ """
122
+ try:
123
+ text = await response.text()
124
+
125
+ # Handle rate limiting (429 Too Many Requests)
126
+ if response.status == 429:
127
+ retry_after = response.headers.get('Retry-After', '60')
128
+ return {
129
+ "error": f"Rate limited. Please try again after {retry_after} seconds.",
130
+ "status_code": 429,
131
+ "url": str(response.url)
132
+ }
133
+
134
+ # Handle other error statuses
135
+ if response.status != 200:
136
+ try:
137
+ error_data = json.loads(text) if text else {}
138
+ return {
139
+ "error": error_data.get("detail", f"API request failed with status {response.status}"),
140
+ "status_code": response.status,
141
+ "url": str(response.url),
142
+ "response": error_data
143
+ }
144
+ except:
145
+ return {
146
+ "error": f"API request failed with status {response.status}",
147
+ "status_code": response.status,
148
+ "url": str(response.url),
149
+ "response_text": text[:1000] # Truncate long responses
150
+ }
151
+
152
+ # Parse successful response
153
+ try:
154
+ return json.loads(text) if text else {}
155
+ except json.JSONDecodeError:
156
+ return {
157
+ "error": "Invalid JSON response from server",
158
+ "status_code": response.status,
159
+ "url": str(response.url),
160
+ "response_text": text[:1000] # Truncate long responses
161
+ }
162
+
163
+ except Exception as e:
164
+ return {
165
+ "error": f"Failed to process API response: {str(e)}",
166
+ "status_code": getattr(response, 'status', None),
167
+ "url": str(getattr(response, 'url', url)),
168
+ "exception": str(e)
169
+ }
170
+
171
+ async def __aenter__(self):
172
+ """Async context manager entry."""
173
+ return self
174
+
175
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
176
+ """Async context manager exit - close the session if it exists."""
177
+ await self.close()
178
+
179
+ async def close(self):
180
+ """Close the client session."""
181
+ if self._session and not self._session.closed:
182
+ await self._session.close()
183
+ self._session = None
184
+
185
+ async def get_todays_featured_article(self) -> Dict[str, Any]:
186
+ """Get today's featured article from Wikipedia.
187
+
188
+ Returns:
189
+ Dict containing the featured article data or error information
190
+
191
+ Example response:
192
+ {
193
+ "tfa": {
194
+ "type": "mainpage",
195
+ "title": "Main Page",
196
+ "displaytitle": "Main Page",
197
+ "titles": {"canonical": "Main_Page", ...},
198
+ "extract": "...",
199
+ "extract_html": "<p>...</p>",
200
+ "thumbnail": {"source": "..."},
201
+ "originalimage": {"source": "..."},
202
+ "content_urls": {...},
203
+ "timestamp": "2023-01-01T00:00:00Z"
204
+ },
205
+ "mostread": {...},
206
+ "onthisday": [...]
207
+ }
208
+ """
209
+ try:
210
+ today = datetime.now(timezone.utc).strftime("%Y/%m/%d")
211
+ endpoint = f"https://en.wikipedia.org/api/rest_v1/feed/featured/{today}"
212
+
213
+ if not self._session or self._session.closed:
214
+ self._session = aiohttp.ClientSession(headers={"User-Agent": self._user_agent})
215
+
216
+ async with self._session.get(endpoint) as response:
217
+ if response.status == 200:
218
+ return await response.json()
219
+ else:
220
+ return {
221
+ "error": f"Failed to fetch featured article: {response.status}",
222
+ "status_code": response.status
223
+ }
224
+
225
+ except Exception as e:
226
+ return {"error": f"Failed to get today's featured article: {str(e)}"}
227
+
228
+ async def search_wikipedia(self, query: str, limit: int = 10) -> Dict[str, Any]:
229
+ """Search Wikipedia articles.
230
+
231
+ Args:
232
+ query: Search query string
233
+ limit: Maximum number of results to return (default: 10, max: 50)
234
+
235
+ Returns:
236
+ Dict containing search results or error information
237
+ """
238
+ params = {
239
+ "action": "query",
240
+ "list": "search",
241
+ "srsearch": query,
242
+ "format": "json",
243
+ "formatversion": "2",
244
+ "srlimit": min(limit, 50), # Max 50 results per API docs
245
+ "srinfo": "totalhits"
246
+ }
247
+
248
+ return await self._make_api_request(params)
app/main.py CHANGED
@@ -18,6 +18,7 @@ logger = configure_logging()
18
 
19
  # Now import other modules
20
  from app.models.schemas import UserPayload, ReportResponse, CompanyData, CompetitorInsight
 
21
  from app.services.llm_client import llm
22
  from app.services.search import search_adapter
23
  from app.utils import pdf_generator, charts
@@ -135,6 +136,53 @@ async def root():
135
  "version": "1.0.0"
136
  }
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  @app.post("/api/v1/analyze", response_model=ReportResponse)
139
  async def analyze_competitors(
140
  payload: UserPayload,
 
18
 
19
  # Now import other modules
20
  from app.models.schemas import UserPayload, ReportResponse, CompanyData, CompetitorInsight
21
+ from pydantic import BaseModel
22
  from app.services.llm_client import llm
23
  from app.services.search import search_adapter
24
  from app.utils import pdf_generator, charts
 
136
  "version": "1.0.0"
137
  }
138
 
139
+ class CompanyDetailsRequest(BaseModel):
140
+ company_asked: str
141
+
142
+ class CompanyDetailsResponse(BaseModel):
143
+ name: str
144
+ location: str
145
+ industry: str
146
+ ceo: str
147
+ founded: str
148
+ details: str
149
+
150
+ @app.post("/api/v1/get-company-details", response_model=CompanyDetailsResponse)
151
+ async def get_company_details(request: CompanyDetailsRequest):
152
+ """
153
+ Get detailed information about a company.
154
+ """
155
+ try:
156
+ # Get company details using the search adapter
157
+ company_data = await search_adapter.enrich_company(
158
+ request.company_asked,
159
+ citation_depth=1,
160
+ geography=None
161
+ )
162
+
163
+ # Extract relevant information
164
+ name = company_data.name or request.company_asked
165
+ location = company_data.location or "Location not available"
166
+ industry = company_data.industry or "Industry not available"
167
+ ceo = company_data.ceo or "CEO information not available"
168
+ founded = company_data.founded or "Founding year not available"
169
+
170
+ # Format the response
171
+ return {
172
+ "name": name,
173
+ "location": location,
174
+ "industry": industry,
175
+ "ceo": ceo,
176
+ "founded": founded,
177
+ "details": f"🔍 Found: {name}\n📍 {location}\n🏢 {industry}\n👤 {ceo}\n📅 Founded {founded}"
178
+ }
179
+ except Exception as e:
180
+ logger.error(f"Error fetching company details: {str(e)}")
181
+ raise HTTPException(
182
+ status_code=500,
183
+ detail=f"Failed to fetch company details: {str(e)}"
184
+ )
185
+
186
  @app.post("/api/v1/analyze", response_model=ReportResponse)
187
  async def analyze_competitors(
188
  payload: UserPayload,
app/models/__pycache__/schemas.cpython-313.pyc CHANGED
Binary files a/app/models/__pycache__/schemas.cpython-313.pyc and b/app/models/__pycache__/schemas.cpython-313.pyc differ
 
app/models/schemas.py CHANGED
@@ -47,6 +47,10 @@ class CompanyData(BaseModel):
47
  name: str
48
  website: Optional[str] = None
49
  description: Optional[str] = None
 
 
 
 
50
  metrics: Dict[str, Any] = {}
51
  notes: List[str] = []
52
  sources: List[Dict[str, Any]] = []
 
47
  name: str
48
  website: Optional[str] = None
49
  description: Optional[str] = None
50
+ location: Optional[str] = None
51
+ industry: Optional[str] = None
52
+ ceo: Optional[str] = None
53
+ founded: Optional[str] = None
54
  metrics: Dict[str, Any] = {}
55
  notes: List[str] = []
56
  sources: List[Dict[str, Any]] = []
app/services/__pycache__/search.cpython-313.pyc CHANGED
Binary files a/app/services/__pycache__/search.cpython-313.pyc and b/app/services/__pycache__/search.cpython-313.pyc differ
 
app/services/search.py CHANGED
@@ -1,16 +1,12 @@
1
  """Search functionality for company data and competitor discovery."""
2
- from typing import List, Optional, Dict, Any
3
  from app.models.schemas import CompanyData
4
- from app.config import settings
5
 
6
  class SearchAdapter:
7
  """Provides company discovery and enrichment functions."""
8
 
9
- def __init__(self,
10
- news_api_key: Optional[str] = settings.NEWS_API_KEY,
11
- crunchbase_key: Optional[str] = settings.CRUNCHBASE_API_KEY):
12
- self.news_api_key = news_api_key
13
- self.crunchbase_key = crunchbase_key
14
 
15
  async def discover_competitors(self,
16
  business_name: str,
@@ -18,11 +14,7 @@ class SearchAdapter:
18
  geography: Optional[str] = None,
19
  limit: int = 5) -> List[str]:
20
  """Discover potential competitors for a business."""
21
- if self.crunchbase_key:
22
- # Placeholder for actual Crunchbase API integration
23
- pass
24
-
25
- # Fallback mock strategy
26
  keywords = business_desc.lower()
27
  if "hr" in keywords or "human resources" in keywords or "payroll" in keywords:
28
  candidates = ["BambooHR", "Gusto", "Rippling", "Zoho People", "UKG"]
@@ -39,25 +31,19 @@ class SearchAdapter:
39
  company_name: str,
40
  citation_depth: int = 3,
41
  geography: Optional[str] = None) -> CompanyData:
42
- """Gather structured and unstructured info for a company."""
43
- # Mock implementation - replace with actual API calls
 
 
 
 
 
 
 
 
44
  return CompanyData(
45
  name=company_name,
46
- website=f"https://{company_name.lower().replace(' ', '')}.example.com",
47
- description=f"A leading company in their industry, {company_name} provides excellent services.",
48
- metrics={
49
- "employees": 1000,
50
- "revenue": "$10M - $50M",
51
- "founded": 2010
52
- },
53
- notes=[
54
- f"{company_name} recently expanded to new markets.",
55
- "Strong social media presence with growing engagement."
56
- ],
57
- sources=[
58
- {"type": "web", "url": f"https://{company_name.lower().replace(' ', '')}.com/about"},
59
- {"type": "news", "title": f"{company_name} announces new product line"}
60
- ][:citation_depth]
61
  )
62
 
63
  # Singleton instance
 
1
  """Search functionality for company data and competitor discovery."""
2
+ from typing import List, Optional
3
  from app.models.schemas import CompanyData
 
4
 
5
  class SearchAdapter:
6
  """Provides company discovery and enrichment functions."""
7
 
8
+ def __init__(self):
9
+ pass
 
 
 
10
 
11
  async def discover_competitors(self,
12
  business_name: str,
 
14
  geography: Optional[str] = None,
15
  limit: int = 5) -> List[str]:
16
  """Discover potential competitors for a business."""
17
+ # Mock implementation for competitor discovery
 
 
 
 
18
  keywords = business_desc.lower()
19
  if "hr" in keywords or "human resources" in keywords or "payroll" in keywords:
20
  candidates = ["BambooHR", "Gusto", "Rippling", "Zoho People", "UKG"]
 
31
  company_name: str,
32
  citation_depth: int = 3,
33
  geography: Optional[str] = None) -> CompanyData:
34
+ """Gather structured and unstructured info for a company.
35
+
36
+ Args:
37
+ company_name: Name of the company to look up
38
+ citation_depth: Number of sources to include (currently not used)
39
+ geography: Optional geographic filter (currently not used)
40
+
41
+ Returns:
42
+ CompanyData: Company information with only real data (currently just the name)
43
+ """
44
  return CompanyData(
45
  name=company_name,
46
+ # All other fields will use their default None/empty values
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  )
48
 
49
  # Singleton instance
test_wikimedia.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Test script for Wikimedia API integration."""
2
+ import asyncio
3
+ import os
4
+ import sys
5
+ from dotenv import load_dotenv
6
+
7
+ # Add the project root to the Python path
8
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
+
10
+ from app.data_sources.wikimedia import WikimediaClient
11
+ from app.config import settings
12
+
13
+ # Load environment variables
14
+ load_dotenv()
15
+
16
+ async def test_wikimedia_integration():
17
+ """Test Wikimedia API integration."""
18
+ # Get app name and email from environment or use defaults
19
+ app_name = getattr(settings, 'WIKIMEDIA_APP_NAME', os.environ.get('WIKIMEDIA_APP_NAME', 'BizInsights'))
20
+ email = getattr(settings, 'WIKIMEDIA_EMAIL', os.environ.get('WIKIMEDIA_EMAIL', 'developer@yugensys.co'))
21
+
22
+ print("Testing Wikimedia API integration...")
23
+
24
+ # Debug output
25
+ print("\nDebug Info:")
26
+ print(f"Using App Name: {app_name}")
27
+ print(f"Using Email: {email}")
28
+ print()
29
+
30
+ # Initialize client with just app name and email
31
+ client = WikimediaClient(app_name=app_name, email=email)
32
+
33
+ try:
34
+ try:
35
+ # Test getting today's featured article
36
+ print("\n1. Fetching today's featured article...")
37
+ featured = await client.get_todays_featured_article()
38
+
39
+ if "error" in featured:
40
+ print(f"Error: {featured['error']}")
41
+ else:
42
+ title = featured.get('tfa', {}).get('titles', {}).get('normalized', 'N/A')
43
+ print(f"✅ Success! Today's featured article: {title}")
44
+ extract = featured.get('tfa', {}).get('extract', '')
45
+ if extract:
46
+ print(f"📝 Extract: {extract[:200]}...")
47
+
48
+ # Print additional metadata if available
49
+ if 'tfa' in featured and 'originalimage' in featured['tfa']:
50
+ print(f"🖼️ Image: {featured['tfa']['originalimage'].get('source', 'Not available')}")
51
+
52
+ # Test searching Wikipedia
53
+ print("\n2. Searching Wikipedia for 'Python programming'...")
54
+ search_term = "Python programming"
55
+ search_results = await client.search_wikipedia(search_term, limit=3)
56
+
57
+ if "error" in search_results:
58
+ print(f"Error: {search_results['error']}")
59
+ else:
60
+ results = search_results.get('query', {}).get('search', [])
61
+ if not results:
62
+ print("No results found. The search API might be rate limited.")
63
+ else:
64
+ print(f"✅ Found {len(results)} results for '{search_term}':")
65
+ for i, result in enumerate(results, 1):
66
+ print(f" {i}. {result.get('title')}")
67
+ if 'snippet' in result:
68
+ # Clean up HTML tags from the snippet
69
+ import re
70
+ snippet = re.sub('<[^<]+?>', '', result['snippet'])
71
+ print(f" {snippet}...")
72
+ print() # Add a blank line between results
73
+ except Exception as e:
74
+ print(f"\n❌ An error occurred during testing: {str(e)}")
75
+ import traceback
76
+ traceback.print_exc()
77
+
78
+ except Exception as e:
79
+ print(f"Error during Wikimedia API test: {str(e)}")
80
+ import traceback
81
+ traceback.print_exc()
82
+
83
+ if __name__ == "__main__":
84
+ print("=== Wikimedia API Integration Test ===\n")
85
+ print("This script will test the Wikimedia API integration.")
86
+ print("No authentication is required for read-only access.\n")
87
+
88
+ try:
89
+ asyncio.run(test_wikimedia_integration())
90
+ except KeyboardInterrupt:
91
+ print("\nTest interrupted by user.")
92
+ except Exception as e:
93
+ print(f"\n❌ An unexpected error occurred: {str(e)}")
94
+ import traceback
95
+ traceback.print_exc()
96
+
97
+ print("\n=== Test completed ===")