Numidium / app /services /lancer.py
Madras1's picture
Upload 49 files
ce92c6b verified
"""
Lancer Deep Research Service
Integrates with Lancer Search API for AI-powered research
"""
import httpx
from typing import Optional, List, Dict, Any
from dataclasses import dataclass
LANCER_BASE_URL = "https://madras1-lancer.hf.space"
@dataclass
class SearchResult:
"""Individual search result from Lancer"""
title: str
url: str
content: str
score: float
published_date: Optional[str] = None
@dataclass
class ResearchResponse:
"""Response from Lancer research/search"""
query: str
answer: Optional[str]
results: List[SearchResult]
citations: List[Dict[str, Any]]
processing_time_ms: float
raw_text: str # Combined text for NER extraction
async def search(
query: str,
max_results: int = 10,
freshness: str = "any"
) -> ResearchResponse:
"""
Perform a search with AI synthesis using Lancer API.
"""
try:
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(
f"{LANCER_BASE_URL}/api/v1/search",
json={
"query": query,
"max_results": max_results,
"freshness": freshness,
"include_answer": True
}
)
if response.status_code != 200:
raise Exception(f"Lancer API error: {response.status_code}")
data = response.json()
results = [
SearchResult(
title=r.get("title", ""),
url=r.get("url", ""),
content=r.get("content", ""),
score=r.get("score", 0.0),
published_date=r.get("published_date")
)
for r in data.get("results", [])
]
# Combine all text for NER
raw_text = data.get("answer", "") or ""
for r in results:
raw_text += f"\n{r.title}. {r.content}"
return ResearchResponse(
query=data.get("query", query),
answer=data.get("answer"),
results=results,
citations=data.get("citations", []),
processing_time_ms=data.get("processing_time_ms", 0),
raw_text=raw_text
)
except Exception as e:
raise Exception(f"Lancer search failed: {str(e)}")
async def deep_research(
query: str,
max_dimensions: int = 5,
max_sources_per_dim: int = 5
) -> ResearchResponse:
"""
Perform deep multi-dimensional research using Lancer API.
This provides richer, more comprehensive analysis.
"""
try:
async with httpx.AsyncClient(timeout=120.0) as client:
response = await client.post(
f"{LANCER_BASE_URL}/api/v1/research/deep",
json={
"query": query,
"max_dimensions": max_dimensions,
"max_sources_per_dim": max_sources_per_dim,
"max_total_searches": 20
}
)
if response.status_code != 200:
raise Exception(f"Lancer API error: {response.status_code}")
data = response.json()
# Deep research returns a different format - adapt it
results = []
raw_text = ""
# Extract from dimensions if present
if "dimensions" in data:
for dim in data["dimensions"]:
dim_name = dim.get("dimension", "")
raw_text += f"\n## {dim_name}\n"
for r in dim.get("results", []):
results.append(SearchResult(
title=r.get("title", ""),
url=r.get("url", ""),
content=r.get("content", ""),
score=r.get("score", 0.0)
))
raw_text += f"{r.get('title', '')}. {r.get('content', '')}\n"
# Add final report
final_report = data.get("final_report", data.get("report", ""))
if final_report:
raw_text = final_report + "\n" + raw_text
return ResearchResponse(
query=query,
answer=final_report,
results=results,
citations=data.get("citations", []),
processing_time_ms=data.get("processing_time_ms", 0),
raw_text=raw_text
)
except Exception as e:
raise Exception(f"Lancer deep research failed: {str(e)}")
async def heavy_search(
query: str,
max_results: int = 5
) -> ResearchResponse:
"""
Heavy search with full content scraping from sources.
Slower but provides more context.
"""
try:
async with httpx.AsyncClient(timeout=90.0) as client:
response = await client.post(
f"{LANCER_BASE_URL}/api/v1/search/heavy",
json={
"query": query,
"max_results": max_results,
"include_answer": True
}
)
if response.status_code != 200:
raise Exception(f"Lancer API error: {response.status_code}")
data = response.json()
results = [
SearchResult(
title=r.get("title", ""),
url=r.get("url", ""),
content=r.get("content", ""),
score=r.get("score", 0.0)
)
for r in data.get("results", [])
]
raw_text = data.get("answer", "") or ""
for r in results:
raw_text += f"\n{r.title}. {r.content}"
return ResearchResponse(
query=query,
answer=data.get("answer"),
results=results,
citations=data.get("citations", []),
processing_time_ms=data.get("processing_time_ms", 0),
raw_text=raw_text
)
except Exception as e:
raise Exception(f"Lancer heavy search failed: {str(e)}")