""" Lancer Deep Research Service Integrates with Lancer Search API for AI-powered research """ import httpx from typing import Optional, List, Dict, Any from dataclasses import dataclass LANCER_BASE_URL = "https://madras1-lancer.hf.space" @dataclass class SearchResult: """Individual search result from Lancer""" title: str url: str content: str score: float published_date: Optional[str] = None @dataclass class ResearchResponse: """Response from Lancer research/search""" query: str answer: Optional[str] results: List[SearchResult] citations: List[Dict[str, Any]] processing_time_ms: float raw_text: str # Combined text for NER extraction async def search( query: str, max_results: int = 10, freshness: str = "any" ) -> ResearchResponse: """ Perform a search with AI synthesis using Lancer API. """ try: async with httpx.AsyncClient(timeout=60.0) as client: response = await client.post( f"{LANCER_BASE_URL}/api/v1/search", json={ "query": query, "max_results": max_results, "freshness": freshness, "include_answer": True } ) if response.status_code != 200: raise Exception(f"Lancer API error: {response.status_code}") data = response.json() results = [ SearchResult( title=r.get("title", ""), url=r.get("url", ""), content=r.get("content", ""), score=r.get("score", 0.0), published_date=r.get("published_date") ) for r in data.get("results", []) ] # Combine all text for NER raw_text = data.get("answer", "") or "" for r in results: raw_text += f"\n{r.title}. {r.content}" return ResearchResponse( query=data.get("query", query), answer=data.get("answer"), results=results, citations=data.get("citations", []), processing_time_ms=data.get("processing_time_ms", 0), raw_text=raw_text ) except Exception as e: raise Exception(f"Lancer search failed: {str(e)}") async def deep_research( query: str, max_dimensions: int = 5, max_sources_per_dim: int = 5 ) -> ResearchResponse: """ Perform deep multi-dimensional research using Lancer API. This provides richer, more comprehensive analysis. """ try: async with httpx.AsyncClient(timeout=120.0) as client: response = await client.post( f"{LANCER_BASE_URL}/api/v1/research/deep", json={ "query": query, "max_dimensions": max_dimensions, "max_sources_per_dim": max_sources_per_dim, "max_total_searches": 20 } ) if response.status_code != 200: raise Exception(f"Lancer API error: {response.status_code}") data = response.json() # Deep research returns a different format - adapt it results = [] raw_text = "" # Extract from dimensions if present if "dimensions" in data: for dim in data["dimensions"]: dim_name = dim.get("dimension", "") raw_text += f"\n## {dim_name}\n" for r in dim.get("results", []): results.append(SearchResult( title=r.get("title", ""), url=r.get("url", ""), content=r.get("content", ""), score=r.get("score", 0.0) )) raw_text += f"{r.get('title', '')}. {r.get('content', '')}\n" # Add final report final_report = data.get("final_report", data.get("report", "")) if final_report: raw_text = final_report + "\n" + raw_text return ResearchResponse( query=query, answer=final_report, results=results, citations=data.get("citations", []), processing_time_ms=data.get("processing_time_ms", 0), raw_text=raw_text ) except Exception as e: raise Exception(f"Lancer deep research failed: {str(e)}") async def heavy_search( query: str, max_results: int = 5 ) -> ResearchResponse: """ Heavy search with full content scraping from sources. Slower but provides more context. """ try: async with httpx.AsyncClient(timeout=90.0) as client: response = await client.post( f"{LANCER_BASE_URL}/api/v1/search/heavy", json={ "query": query, "max_results": max_results, "include_answer": True } ) if response.status_code != 200: raise Exception(f"Lancer API error: {response.status_code}") data = response.json() results = [ SearchResult( title=r.get("title", ""), url=r.get("url", ""), content=r.get("content", ""), score=r.get("score", 0.0) ) for r in data.get("results", []) ] raw_text = data.get("answer", "") or "" for r in results: raw_text += f"\n{r.title}. {r.content}" return ResearchResponse( query=query, answer=data.get("answer"), results=results, citations=data.get("citations", []), processing_time_ms=data.get("processing_time_ms", 0), raw_text=raw_text ) except Exception as e: raise Exception(f"Lancer heavy search failed: {str(e)}")