Spaces:

devyugensys
/

BizInsights_agent

Sleeping

App Files Files Community

pranav8tripathi@gmail.com commited on Oct 3, 2025

Commit

a1a3895

1 Parent(s): b44d0b9

wikimedia setup completed

Browse files

Files changed (12) hide show

.env +7 -1
app/__pycache__/config.cpython-313.pyc +0 -0
app/__pycache__/main.cpython-313.pyc +0 -0
app/config.py +5 -2
app/data_sources/__pycache__/wikimedia.cpython-313.pyc +0 -0
app/data_sources/wikimedia.py +248 -0
app/main.py +48 -0
app/models/__pycache__/schemas.cpython-313.pyc +0 -0
app/models/schemas.py +4 -0
app/services/__pycache__/search.cpython-313.pyc +0 -0
app/services/search.py +15 -29
test_wikimedia.py +97 -0

.env CHANGED Viewed

	@@ -1 +1,7 @@
1	- DEEPSEEK_API_KEY=sk-153c4decdf4e4a79995c685af7fad5c8

+DEEPSEEK_API_KEY=sk-153c4decdf4e4a79995c685af7fad5c8
+# Wikimedia API Credentials
+WIKIMEDIA_CLIENT_ID=2a9587beb7f14348fec94383ab8eb380
+WIKIMEDIA_CLIENT_SECRET=7d49f5ada620d25913c78d3cd934152eee49355a
+WIKIMEDIA_APP_NAME=BizInsights
+WIKIMEDIA_EMAIL=developer@yugensys.co  # replace with your contact email

app/__pycache__/config.cpython-313.pyc CHANGED Viewed

Binary files a/app/__pycache__/config.cpython-313.pyc and b/app/__pycache__/config.cpython-313.pyc differ

app/__pycache__/main.cpython-313.pyc CHANGED Viewed

Binary files a/app/__pycache__/main.cpython-313.pyc and b/app/__pycache__/main.cpython-313.pyc differ

app/config.py CHANGED Viewed

@@ -9,8 +9,11 @@ class Settings(BaseSettings):
     DEEPSEEK_API_KEY: str
     DEEPSEEK_API_URL: str = "https://api.deepseek.com/v1/chat/completions"
     DEEPSEEK_ENDPOINT: Optional[str] = None  # For backward compatibility
-    NEWS_API_KEY: Optional[str] = None
-    CRUNCHBASE_API_KEY: Optional[str] = None
     # Application settings
     APP_NAME: str = "RivalLens API"

     DEEPSEEK_API_KEY: str
     DEEPSEEK_API_URL: str = "https://api.deepseek.com/v1/chat/completions"
     DEEPSEEK_ENDPOINT: Optional[str] = None  # For backward compatibility
+    # Wikimedia API Settings
+    WIKIMEDIA_ACCESS_TOKEN: Optional[str] = None
+    WIKIMEDIA_APP_NAME: str = "BizInsights"
+    WIKIMEDIA_EMAIL: str = "your-email@example.com"
     # Application settings
     APP_NAME: str = "RivalLens API"

app/data_sources/__pycache__/wikimedia.cpython-313.pyc ADDED Viewed

Binary file (11.5 kB). View file

app/data_sources/wikimedia.py ADDED Viewed

	@@ -0,0 +1,248 @@

+"""Wikimedia API client for fetching data from Wikipedia and other Wikimedia projects."""
+import aiohttp
+import asyncio
+import json
+import logging
+from datetime import datetime, timezone
+from typing import Dict, Any, Optional, List, Union
+class WikimediaClient:
+    """Client for interacting with the Wikimedia API."""
+    def __init__(
+        self,
+        app_name: str = "BizInsights",
+        email: str = "developer@yugensys.co",
+        api_base_url: str = "https://en.wikipedia.org/w/api.php"
+    ) -> None:
+        """Initialize the Wikimedia client.
+        Args:
+            app_name: Name of your application (for User-Agent header)
+            email: Your email address (for contact in case of issues)
+            api_base_url: Base URL for the MediaWiki API
+        """
+        self.api_base_url = api_base_url.rstrip("/")
+        self._session = None
+        self._user_agent = f"{app_name}/1.0 ({email}) Python/aiohttp"
+    async def _make_api_request(
+        self,
+        params: Dict[str, Any],
+        endpoint: str = ""
+    ) -> Dict[str, Any]:
+        """Make a request to the MediaWiki API.
+        Args:
+            params: Query parameters for the API request
+            endpoint: API endpoint (defaults to the main API endpoint)
+        Returns:
+            Dict containing the JSON response or error information
+        """
+        url = f"{self.api_base_url}/{endpoint.lstrip('/')}"
+        # Set default parameters
+        params.setdefault("format", "json")
+        params.setdefault("formatversion", "2")
+        headers = {
+            "User-Agent": self._user_agent,
+            "Accept": "application/json"
+        }
+        try:
+            if not self._session or self._session.closed:
+                self._session = aiohttp.ClientSession()
+            async with self._session.get(
+                url,
+                params=params,
+                headers=headers
+            ) as response:
+                if response.status != 200:
+                    error_text = await response.text()
+                    return {
+                        "error": f"API request failed with status {response.status}",
+                        "status_code": response.status,
+                        "response": error_text
+                    }
+                try:
+                    return await response.json()
+                except Exception as e:
+                    return {
+                        "error": f"Failed to parse JSON response: {str(e)}",
+                        "status_code": response.status,
+                        "response_text": await response.text()
+                    }
+        except Exception as e:
+            return {
+                "error": f"Request failed: {str(e)}"
+            }
+    async def _make_authenticated_request(
+        self,
+        method: str,
+        endpoint: str,
+        params: Optional[Dict[str, Any]] = None,
+        json_data: Optional[Dict[str, Any]] = None,
+        is_api: bool = False
+    ) -> Dict[str, Any]:
+        """Make a request to the Wikimedia API.
+        Args:
+            method: HTTP method (GET, POST, etc.) - Only GET is supported for public APIs
+            endpoint: API endpoint (without base URL)
+            params: Query parameters
+            json_data: JSON payload (not used for GET requests)
+            is_api: Whether to use the MediaWiki API base URL
+        Returns:
+            Dict containing the JSON response or error information
+        """
+        # For public read-only access, we only support GET requests
+        if method.upper() != 'GET':
+            return {"error": "Only GET requests are supported for public API access"}
+        # Use the _make_api_request for all API calls
+        return await self._make_api_request(params or {}, endpoint)
+    async def _process_response(self, response: aiohttp.ClientResponse, url: str) -> Dict[str, Any]:
+        """Process the API response.
+        Args:
+            response: aiohttp response object
+            url: The URL that was requested
+        Returns:
+            Dict containing the parsed JSON response or error information
+        """
+        try:
+            text = await response.text()
+            # Handle rate limiting (429 Too Many Requests)
+            if response.status == 429:
+                retry_after = response.headers.get('Retry-After', '60')
+                return {
+                    "error": f"Rate limited. Please try again after {retry_after} seconds.",
+                    "status_code": 429,
+                    "url": str(response.url)
+                }
+            # Handle other error statuses
+            if response.status != 200:
+                try:
+                    error_data = json.loads(text) if text else {}
+                    return {
+                        "error": error_data.get("detail", f"API request failed with status {response.status}"),
+                        "status_code": response.status,
+                        "url": str(response.url),
+                        "response": error_data
+                    }
+                except:
+                    return {
+                        "error": f"API request failed with status {response.status}",
+                        "status_code": response.status,
+                        "url": str(response.url),
+                        "response_text": text[:1000]  # Truncate long responses
+                    }
+            # Parse successful response
+            try:
+                return json.loads(text) if text else {}
+            except json.JSONDecodeError:
+                return {
+                    "error": "Invalid JSON response from server",
+                    "status_code": response.status,
+                    "url": str(response.url),
+                    "response_text": text[:1000]  # Truncate long responses
+                }
+        except Exception as e:
+            return {
+                "error": f"Failed to process API response: {str(e)}",
+                "status_code": getattr(response, 'status', None),
+                "url": str(getattr(response, 'url', url)),
+                "exception": str(e)
+            }
+    async def __aenter__(self):
+        """Async context manager entry."""
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Async context manager exit - close the session if it exists."""
+        await self.close()
+    async def close(self):
+        """Close the client session."""
+        if self._session and not self._session.closed:
+            await self._session.close()
+            self._session = None
+    async def get_todays_featured_article(self) -> Dict[str, Any]:
+        """Get today's featured article from Wikipedia.
+        Returns:
+            Dict containing the featured article data or error information
+        Example response:
+            {
+                "tfa": {
+                    "type": "mainpage",
+                    "title": "Main Page",
+                    "displaytitle": "Main Page",
+                    "titles": {"canonical": "Main_Page", ...},
+                    "extract": "...",
+                    "extract_html": "<p>...</p>",
+                    "thumbnail": {"source": "..."},
+                    "originalimage": {"source": "..."},
+                    "content_urls": {...},
+                    "timestamp": "2023-01-01T00:00:00Z"
+                },
+                "mostread": {...},
+                "onthisday": [...]
+            }
+        """
+        try:
+            today = datetime.now(timezone.utc).strftime("%Y/%m/%d")
+            endpoint = f"https://en.wikipedia.org/api/rest_v1/feed/featured/{today}"
+            if not self._session or self._session.closed:
+                self._session = aiohttp.ClientSession(headers={"User-Agent": self._user_agent})
+            async with self._session.get(endpoint) as response:
+                if response.status == 200:
+                    return await response.json()
+                else:
+                    return {
+                        "error": f"Failed to fetch featured article: {response.status}",
+                        "status_code": response.status
+                    }
+        except Exception as e:
+            return {"error": f"Failed to get today's featured article: {str(e)}"}
+    async def search_wikipedia(self, query: str, limit: int = 10) -> Dict[str, Any]:
+        """Search Wikipedia articles.
+        Args:
+            query: Search query string
+            limit: Maximum number of results to return (default: 10, max: 50)
+        Returns:
+            Dict containing search results or error information
+        """
+        params = {
+            "action": "query",
+            "list": "search",
+            "srsearch": query,
+            "format": "json",
+            "formatversion": "2",
+            "srlimit": min(limit, 50),  # Max 50 results per API docs
+            "srinfo": "totalhits"
+        }
+        return await self._make_api_request(params)

app/main.py CHANGED Viewed

@@ -18,6 +18,7 @@ logger = configure_logging()
 # Now import other modules
 from app.models.schemas import UserPayload, ReportResponse, CompanyData, CompetitorInsight
 from app.services.llm_client import llm
 from app.services.search import search_adapter
 from app.utils import pdf_generator, charts
@@ -135,6 +136,53 @@ async def root():
         "version": "1.0.0"
     }
 @app.post("/api/v1/analyze", response_model=ReportResponse)
 async def analyze_competitors(
     payload: UserPayload,

 # Now import other modules
 from app.models.schemas import UserPayload, ReportResponse, CompanyData, CompetitorInsight
+from pydantic import BaseModel
 from app.services.llm_client import llm
 from app.services.search import search_adapter
 from app.utils import pdf_generator, charts
         "version": "1.0.0"
     }
+class CompanyDetailsRequest(BaseModel):
+    company_asked: str
+class CompanyDetailsResponse(BaseModel):
+    name: str
+    location: str
+    industry: str
+    ceo: str
+    founded: str
+    details: str
+@app.post("/api/v1/get-company-details", response_model=CompanyDetailsResponse)
+async def get_company_details(request: CompanyDetailsRequest):
+    """
+    Get detailed information about a company.
+    """
+    try:
+        # Get company details using the search adapter
+        company_data = await search_adapter.enrich_company(
+            request.company_asked,
+            citation_depth=1,
+            geography=None
+        )
+        # Extract relevant information
+        name = company_data.name or request.company_asked
+        location = company_data.location or "Location not available"
+        industry = company_data.industry or "Industry not available"
+        ceo = company_data.ceo or "CEO information not available"
+        founded = company_data.founded or "Founding year not available"
+        # Format the response
+        return {
+            "name": name,
+            "location": location,
+            "industry": industry,
+            "ceo": ceo,
+            "founded": founded,
+            "details": f"🔍 Found: {name}\n📍 {location}\n🏢 {industry}\n👤 {ceo}\n📅 Founded {founded}"
+        }
+    except Exception as e:
+        logger.error(f"Error fetching company details: {str(e)}")
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to fetch company details: {str(e)}"
+        )
 @app.post("/api/v1/analyze", response_model=ReportResponse)
 async def analyze_competitors(
     payload: UserPayload,

app/models/__pycache__/schemas.cpython-313.pyc CHANGED Viewed

Binary files a/app/models/__pycache__/schemas.cpython-313.pyc and b/app/models/__pycache__/schemas.cpython-313.pyc differ

app/models/schemas.py CHANGED Viewed

@@ -47,6 +47,10 @@ class CompanyData(BaseModel):
     name: str
     website: Optional[str] = None
     description: Optional[str] = None
     metrics: Dict[str, Any] = {}
     notes: List[str] = []
     sources: List[Dict[str, Any]] = []

     name: str
     website: Optional[str] = None
     description: Optional[str] = None
+    location: Optional[str] = None
+    industry: Optional[str] = None
+    ceo: Optional[str] = None
+    founded: Optional[str] = None
     metrics: Dict[str, Any] = {}
     notes: List[str] = []
     sources: List[Dict[str, Any]] = []

app/services/__pycache__/search.cpython-313.pyc CHANGED Viewed

Binary files a/app/services/__pycache__/search.cpython-313.pyc and b/app/services/__pycache__/search.cpython-313.pyc differ

app/services/search.py CHANGED Viewed

@@ -1,16 +1,12 @@
 """Search functionality for company data and competitor discovery."""
-from typing import List, Optional, Dict, Any
 from app.models.schemas import CompanyData
-from app.config import settings
 class SearchAdapter:
     """Provides company discovery and enrichment functions."""
-    def __init__(self,
-                 news_api_key: Optional[str] = settings.NEWS_API_KEY,
-                 crunchbase_key: Optional[str] = settings.CRUNCHBASE_API_KEY):
-        self.news_api_key = news_api_key
-        self.crunchbase_key = crunchbase_key
     async def discover_competitors(self,
                                  business_name: str,
@@ -18,11 +14,7 @@ class SearchAdapter:
                                  geography: Optional[str] = None,
                                  limit: int = 5) -> List[str]:
         """Discover potential competitors for a business."""
-        if self.crunchbase_key:
-            # Placeholder for actual Crunchbase API integration
-            pass
-        # Fallback mock strategy
         keywords = business_desc.lower()
         if "hr" in keywords or "human resources" in keywords or "payroll" in keywords:
             candidates = ["BambooHR", "Gusto", "Rippling", "Zoho People", "UKG"]
@@ -39,25 +31,19 @@ class SearchAdapter:
                            company_name: str,
                            citation_depth: int = 3,
                            geography: Optional[str] = None) -> CompanyData:
-        """Gather structured and unstructured info for a company."""
-        # Mock implementation - replace with actual API calls
         return CompanyData(
             name=company_name,
-            website=f"https://{company_name.lower().replace(' ', '')}.example.com",
-            description=f"A leading company in their industry, {company_name} provides excellent services.",
-            metrics={
-                "employees": 1000,
-                "revenue": "$10M - $50M",
-                "founded": 2010
-            },
-            notes=[
-                f"{company_name} recently expanded to new markets.",
-                "Strong social media presence with growing engagement."
-            ],
-            sources=[
-                {"type": "web", "url": f"https://{company_name.lower().replace(' ', '')}.com/about"},
-                {"type": "news", "title": f"{company_name} announces new product line"}
-            ][:citation_depth]
         )
 # Singleton instance

 """Search functionality for company data and competitor discovery."""
+from typing import List, Optional
 from app.models.schemas import CompanyData
 class SearchAdapter:
     """Provides company discovery and enrichment functions."""
+    def __init__(self):
+        pass
     async def discover_competitors(self,
                                  business_name: str,
                                  geography: Optional[str] = None,
                                  limit: int = 5) -> List[str]:
         """Discover potential competitors for a business."""
+        # Mock implementation for competitor discovery
         keywords = business_desc.lower()
         if "hr" in keywords or "human resources" in keywords or "payroll" in keywords:
             candidates = ["BambooHR", "Gusto", "Rippling", "Zoho People", "UKG"]
                            company_name: str,
                            citation_depth: int = 3,
                            geography: Optional[str] = None) -> CompanyData:
+        """Gather structured and unstructured info for a company.
+        Args:
+            company_name: Name of the company to look up
+            citation_depth: Number of sources to include (currently not used)
+            geography: Optional geographic filter (currently not used)
+        Returns:
+            CompanyData: Company information with only real data (currently just the name)
+        """
         return CompanyData(
             name=company_name,
+            # All other fields will use their default None/empty values
         )
 # Singleton instance

test_wikimedia.py ADDED Viewed

	@@ -0,0 +1,97 @@

+"""Test script for Wikimedia API integration."""
+import asyncio
+import os
+import sys
+from dotenv import load_dotenv
+# Add the project root to the Python path
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from app.data_sources.wikimedia import WikimediaClient
+from app.config import settings
+# Load environment variables
+load_dotenv()
+async def test_wikimedia_integration():
+    """Test Wikimedia API integration."""
+    # Get app name and email from environment or use defaults
+    app_name = getattr(settings, 'WIKIMEDIA_APP_NAME', os.environ.get('WIKIMEDIA_APP_NAME', 'BizInsights'))
+    email = getattr(settings, 'WIKIMEDIA_EMAIL', os.environ.get('WIKIMEDIA_EMAIL', 'developer@yugensys.co'))
+    print("Testing Wikimedia API integration...")
+    # Debug output
+    print("\nDebug Info:")
+    print(f"Using App Name: {app_name}")
+    print(f"Using Email: {email}")
+    print()
+    # Initialize client with just app name and email
+    client = WikimediaClient(app_name=app_name, email=email)
+    try:
+        try:
+            # Test getting today's featured article
+            print("\n1. Fetching today's featured article...")
+            featured = await client.get_todays_featured_article()
+            if "error" in featured:
+                print(f"Error: {featured['error']}")
+            else:
+                title = featured.get('tfa', {}).get('titles', {}).get('normalized', 'N/A')
+                print(f"✅ Success! Today's featured article: {title}")
+                extract = featured.get('tfa', {}).get('extract', '')
+                if extract:
+                    print(f"📝 Extract: {extract[:200]}...")
+                # Print additional metadata if available
+                if 'tfa' in featured and 'originalimage' in featured['tfa']:
+                    print(f"🖼️ Image: {featured['tfa']['originalimage'].get('source', 'Not available')}")
+            # Test searching Wikipedia
+            print("\n2. Searching Wikipedia for 'Python programming'...")
+            search_term = "Python programming"
+            search_results = await client.search_wikipedia(search_term, limit=3)
+            if "error" in search_results:
+                print(f"Error: {search_results['error']}")
+            else:
+                results = search_results.get('query', {}).get('search', [])
+                if not results:
+                    print("No results found. The search API might be rate limited.")
+                else:
+                    print(f"✅ Found {len(results)} results for '{search_term}':")
+                    for i, result in enumerate(results, 1):
+                        print(f"   {i}. {result.get('title')}")
+                        if 'snippet' in result:
+                            # Clean up HTML tags from the snippet
+                            import re
+                            snippet = re.sub('<[^<]+?>', '', result['snippet'])
+                            print(f"      {snippet}...")
+                        print()  # Add a blank line between results
+        except Exception as e:
+            print(f"\n❌ An error occurred during testing: {str(e)}")
+            import traceback
+            traceback.print_exc()
+    except Exception as e:
+        print(f"Error during Wikimedia API test: {str(e)}")
+        import traceback
+        traceback.print_exc()
+if __name__ == "__main__":
+    print("=== Wikimedia API Integration Test ===\n")
+    print("This script will test the Wikimedia API integration.")
+    print("No authentication is required for read-only access.\n")
+    try:
+        asyncio.run(test_wikimedia_integration())
+    except KeyboardInterrupt:
+        print("\nTest interrupted by user.")
+    except Exception as e:
+        print(f"\n❌ An unexpected error occurred: {str(e)}")
+        import traceback
+        traceback.print_exc()
+    print("\n=== Test completed ===")