"""
DBpedia Integration for Autocomplete and Structured Data

DBpedia extracts structured "triples" from Wikipedia infoboxes.
Every Wikipedia page becomes a "resource" with structured data.

LOOKUP API: http://lookup.dbpedia.org/api/search
REST API: https://dbpedia.org/sparql

KEY ADVANTAGES:
✅ Completely FREE - no API key required
✅ Perfect for autocomplete/type-ahead - Lookup API is designed for this
✅ Structured data from Wikipedia - millions of resources
✅ Instant access to Mayor, population, school district info
✅ Rich context for search results

USE CASES FOR CIVIC ENGAGEMENT:
- Autocomplete in search box (cities, people, organizations)
- Type-ahead suggestions
- Structured data for entities (mayor, population, etc.)
- Linking Wikipedia pages to structured data
- Enriching search results with context

EXAMPLE QUERIES:
- "Tuscaloosa" → Get Mayor, population, school district
- "School Board" → Find all school boards
- "Alabama cities" → Get all cities in Alabama
- Person name → Get positions, affiliations

API DOCUMENTATION:
- Lookup API: http://lookup.dbpedia.org/api/doc/
- SPARQL: https://dbpedia.org/sparql
- Examples: https://wiki.dbpedia.org/develop/datasets

USAGE:
    from discovery.dbpedia_integration import DBpediaLookup
    
    dbpedia = DBpediaLookup()
    
    # Autocomplete search
    results = await dbpedia.search("Tuscaloosa", max_results=10)
    
    # Get detailed info about a resource
    info = await dbpedia.get_resource_info("Tuscaloosa,_Alabama")
    
    # Search for specific types (cities, people, organizations)
    cities = await dbpedia.search_by_type("Alabama", type_filter="Place")
"""
import asyncio
from typing import List, Dict, Optional
from datetime import datetime
from pathlib import Path
import httpx
from loguru import logger

try:
    from pyspark.sql import SparkSession
    from config.settings import settings
    SPARK_AVAILABLE = True
except ImportError:
    SPARK_AVAILABLE = False
    settings = None


class DBpediaLookup:
    """
    Query DBpedia for autocomplete and structured data.
    
    DBpedia is completely FREE and perfect for type-ahead search boxes.
    """
    
    LOOKUP_API = "http://lookup.dbpedia.org/api/search"
    SPARQL_ENDPOINT = "https://dbpedia.org/sparql"
    
    # Common DBpedia ontology classes
    CLASSES = {
        "place": "Place",
        "city": "City",
        "person": "Person",
        "organization": "Organisation",
        "government": "GovernmentAgency",
        "school": "School",
        "politician": "Politician",
    }
    
    def __init__(self, cache_dir: str = "data/cache/dbpedia"):
        """Initialize DBpedia lookup client."""
        self.cache_dir = Path(cache_dir)
        self.cache_dir.mkdir(parents=True, exist_ok=True)
    
    async def search(
        self,
        query: str,
        max_results: int = 10,
        type_filter: Optional[str] = None
    ) -> List[Dict]:
        """
        Search DBpedia (autocomplete/type-ahead).
        
        Args:
            query: Search query (e.g., "Tuscaloosa", "School Board")
            max_results: Maximum number of results
            type_filter: Filter by type (e.g., "Place", "Person", "Organisation")
            
        Returns:
            List of result dicts with URI, label, description, etc.
        """
        logger.info(f"Searching DBpedia for: {query}")
        
        params = {
            "query": query,
            "maxResults": max_results,
            "format": "json"
        }
        
        if type_filter:
            params["type"] = type_filter
        
        async with httpx.AsyncClient(timeout=30.0) as client:
            try:
                response = await client.get(
                    self.LOOKUP_API,
                    params=params,
                    headers={
                        "User-Agent": "CivicEngagementBot/1.0 (Educational Research)",
                        "Accept": "application/json"
                    }
                )
                response.raise_for_status()
                data = response.json()
                
                # Extract results
                results = []
                for item in data.get("results", []):
                    results.append({
                        "label": item.get("label"),
                        "uri": item.get("uri"),
                        "description": item.get("description"),
                        "classes": item.get("classes", []),
                        "categories": item.get("categories", []),
                        "refCount": item.get("refCount", 0),  # How many Wikipedia pages link to this
                        "source": "dbpedia",
                        "fetched_at": datetime.utcnow().isoformat()
                    })
                
                logger.info(f"✅ Found {len(results)} results for '{query}'")
                return results
                
            except Exception as e:
                logger.error(f"Error searching DBpedia: {e}")
                raise
    
    async def search_by_type(
        self,
        query: str,
        type_filter: str,
        max_results: int = 20
    ) -> List[Dict]:
        """
        Search for specific entity types.
        
        Args:
            query: Search query
            type_filter: Entity type ("Place", "Person", "Organisation", etc.)
            max_results: Maximum results
            
        Returns:
            Filtered results of that type
        """
        logger.info(f"Searching for {type_filter}: {query}")
        
        return await self.search(
            query=query,
            max_results=max_results,
            type_filter=type_filter
        )
    
    async def get_resource_info(self, resource: str) -> Dict:
        """
        Get detailed information about a DBpedia resource.
        
        Args:
            resource: Resource name (e.g., "Tuscaloosa,_Alabama")
            
        Returns:
            Dict with resource information
        """
        # DBpedia resource URL
        if not resource.startswith("http"):
            resource_url = f"http://dbpedia.org/resource/{resource}"
        else:
            resource_url = resource
        
        logger.info(f"Fetching resource info: {resource_url}")
        
        # Query SPARQL endpoint for all properties
        query = f"""
        SELECT ?property ?value
        WHERE {{
          <{resource_url}> ?property ?value .
        }}
        LIMIT 100
        """
        
        async with httpx.AsyncClient(timeout=30.0) as client:
            try:
                response = await client.get(
                    self.SPARQL_ENDPOINT,
                    params={
                        "query": query,
                        "format": "json"
                    },
                    headers={
                        "User-Agent": "CivicEngagementBot/1.0",
                        "Accept": "application/sparql-results+json"
                    }
                )
                response.raise_for_status()
                data = response.json()
                
                # Parse results into structured dict
                info = {
                    "resource": resource_url,
                    "properties": {},
                    "source": "dbpedia",
                    "fetched_at": datetime.utcnow().isoformat()
                }
                
                for binding in data.get("results", {}).get("bindings", []):
                    prop = binding.get("property", {}).get("value", "")
                    value = binding.get("value", {}).get("value", "")
                    
                    # Extract property name from URI
                    prop_name = prop.split("/")[-1].split("#")[-1]
                    
                    # Store property
                    if prop_name not in info["properties"]:
                        info["properties"][prop_name] = []
                    info["properties"][prop_name].append(value)
                
                logger.info(f"✅ Found {len(info['properties'])} properties for {resource}")
                return info
                
            except Exception as e:
                logger.error(f"Error fetching resource info: {e}")
                raise
    
    async def find_cities(self, state: Optional[str] = None) -> List[Dict]:
        """
        Find cities (with optional state filter).
        
        Args:
            state: State name to filter by
            
        Returns:
            List of city dicts
        """
        if state:
            query = f"cities in {state}"
        else:
            query = "city"
        
        return await self.search_by_type(
            query=query,
            type_filter="City",
            max_results=50
        )
    
    async def find_people(self, name_query: str) -> List[Dict]:
        """
        Find people by name.
        
        Args:
            name_query: Name or partial name
            
        Returns:
            List of person dicts
        """
        return await self.search_by_type(
            query=name_query,
            type_filter="Person",
            max_results=20
        )
    
    async def find_organizations(self, org_query: str) -> List[Dict]:
        """
        Find organizations.
        
        Args:
            org_query: Organization name or keyword
            
        Returns:
            List of organization dicts
        """
        return await self.search_by_type(
            query=org_query,
            type_filter="Organisation",
            max_results=20
        )
    
    def save_to_json(self, data, filename: str):
        """Save data to JSON cache."""
        import json
        
        filepath = self.cache_dir / filename
        with open(filepath, 'w') as f:
            json.dump(data, f, indent=2)
        
        logger.info(f"💾 Saved data to {filepath}")


# ============================================================================
# Example Usage
# ============================================================================

async def example_usage():
    """Example usage of DBpedia integration."""
    
    dbpedia = DBpediaLookup()
    
    # Example 1: Autocomplete search for "Tuscaloosa"
    logger.info("\n" + "="*80)
    logger.info("Example 1: Autocomplete search for 'Tuscaloosa'")
    logger.info("="*80)
    
    try:
        results = await dbpedia.search("Tuscaloosa", max_results=10)
        
        print(f"\n✅ Found {len(results)} results:")
        for result in results:
            print(f"\n   • {result['label']}")
            if result.get('description'):
                print(f"     {result['description']}")
            print(f"     URI: {result['uri']}")
            print(f"     Reference count: {result['refCount']}")
        
        if results:
            dbpedia.save_to_json(results, "tuscaloosa_search.json")
        
    except Exception as e:
        logger.error(f"Error: {e}")
    
    # Example 2: Get detailed info about Tuscaloosa, Alabama
    logger.info("\n" + "="*80)
    logger.info("Example 2: Get detailed info about Tuscaloosa, Alabama")
    logger.info("="*80)
    
    try:
        info = await dbpedia.get_resource_info("Tuscaloosa,_Alabama")
        
        print(f"\n✅ Found {len(info['properties'])} properties:")
        
        # Show interesting properties
        interesting = [
            "mayor", "population", "areaCode", "postalCode",
            "website", "leaderTitle", "foundingDate"
        ]
        
        for prop in interesting:
            if prop in info['properties']:
                print(f"   • {prop}: {info['properties'][prop]}")
        
        dbpedia.save_to_json(info, "tuscaloosa_info.json")
        
    except Exception as e:
        logger.error(f"Error: {e}")
    
    # Example 3: Search for cities in Alabama
    logger.info("\n" + "="*80)
    logger.info("Example 3: Search for cities in Alabama")
    logger.info("="*80)
    
    try:
        cities = await dbpedia.find_cities(state="Alabama")
        
        print(f"\n✅ Found {len(cities)} cities in Alabama:")
        for city in cities[:10]:  # Show first 10
            print(f"   • {city['label']}")
            if city.get('description'):
                print(f"     {city['description']}")
        
        if cities:
            dbpedia.save_to_json(cities, "alabama_cities.json")
        
    except Exception as e:
        logger.error(f"Error: {e}")
    
    # Example 4: Search for people (politicians)
    logger.info("\n" + "="*80)
    logger.info("Example 4: Search for Alabama politicians")
    logger.info("="*80)
    
    try:
        people = await dbpedia.find_people("Alabama mayor")
        
        print(f"\n✅ Found {len(people)} people:")
        for person in people[:10]:
            print(f"   • {person['label']}")
            if person.get('description'):
                print(f"     {person['description']}")
        
        if people:
            dbpedia.save_to_json(people, "alabama_politicians.json")
        
    except Exception as e:
        logger.error(f"Error: {e}")
    
    logger.info("\n✅ Examples complete!")
    logger.info("\n" + "="*80)
    logger.info("DBpedia Lookup API is perfect for autocomplete!")
    logger.info("Use it in your search box for instant suggestions.")
    logger.info("="*80)


if __name__ == "__main__":
    asyncio.run(example_usage())