File size: 6,658 Bytes
ce92c6b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 | """
Lancer Deep Research Service
Integrates with Lancer Search API for AI-powered research
"""
import httpx
from typing import Optional, List, Dict, Any
from dataclasses import dataclass
LANCER_BASE_URL = "https://madras1-lancer.hf.space"
@dataclass
class SearchResult:
"""Individual search result from Lancer"""
title: str
url: str
content: str
score: float
published_date: Optional[str] = None
@dataclass
class ResearchResponse:
"""Response from Lancer research/search"""
query: str
answer: Optional[str]
results: List[SearchResult]
citations: List[Dict[str, Any]]
processing_time_ms: float
raw_text: str # Combined text for NER extraction
async def search(
query: str,
max_results: int = 10,
freshness: str = "any"
) -> ResearchResponse:
"""
Perform a search with AI synthesis using Lancer API.
"""
try:
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(
f"{LANCER_BASE_URL}/api/v1/search",
json={
"query": query,
"max_results": max_results,
"freshness": freshness,
"include_answer": True
}
)
if response.status_code != 200:
raise Exception(f"Lancer API error: {response.status_code}")
data = response.json()
results = [
SearchResult(
title=r.get("title", ""),
url=r.get("url", ""),
content=r.get("content", ""),
score=r.get("score", 0.0),
published_date=r.get("published_date")
)
for r in data.get("results", [])
]
# Combine all text for NER
raw_text = data.get("answer", "") or ""
for r in results:
raw_text += f"\n{r.title}. {r.content}"
return ResearchResponse(
query=data.get("query", query),
answer=data.get("answer"),
results=results,
citations=data.get("citations", []),
processing_time_ms=data.get("processing_time_ms", 0),
raw_text=raw_text
)
except Exception as e:
raise Exception(f"Lancer search failed: {str(e)}")
async def deep_research(
query: str,
max_dimensions: int = 5,
max_sources_per_dim: int = 5
) -> ResearchResponse:
"""
Perform deep multi-dimensional research using Lancer API.
This provides richer, more comprehensive analysis.
"""
try:
async with httpx.AsyncClient(timeout=120.0) as client:
response = await client.post(
f"{LANCER_BASE_URL}/api/v1/research/deep",
json={
"query": query,
"max_dimensions": max_dimensions,
"max_sources_per_dim": max_sources_per_dim,
"max_total_searches": 20
}
)
if response.status_code != 200:
raise Exception(f"Lancer API error: {response.status_code}")
data = response.json()
# Deep research returns a different format - adapt it
results = []
raw_text = ""
# Extract from dimensions if present
if "dimensions" in data:
for dim in data["dimensions"]:
dim_name = dim.get("dimension", "")
raw_text += f"\n## {dim_name}\n"
for r in dim.get("results", []):
results.append(SearchResult(
title=r.get("title", ""),
url=r.get("url", ""),
content=r.get("content", ""),
score=r.get("score", 0.0)
))
raw_text += f"{r.get('title', '')}. {r.get('content', '')}\n"
# Add final report
final_report = data.get("final_report", data.get("report", ""))
if final_report:
raw_text = final_report + "\n" + raw_text
return ResearchResponse(
query=query,
answer=final_report,
results=results,
citations=data.get("citations", []),
processing_time_ms=data.get("processing_time_ms", 0),
raw_text=raw_text
)
except Exception as e:
raise Exception(f"Lancer deep research failed: {str(e)}")
async def heavy_search(
query: str,
max_results: int = 5
) -> ResearchResponse:
"""
Heavy search with full content scraping from sources.
Slower but provides more context.
"""
try:
async with httpx.AsyncClient(timeout=90.0) as client:
response = await client.post(
f"{LANCER_BASE_URL}/api/v1/search/heavy",
json={
"query": query,
"max_results": max_results,
"include_answer": True
}
)
if response.status_code != 200:
raise Exception(f"Lancer API error: {response.status_code}")
data = response.json()
results = [
SearchResult(
title=r.get("title", ""),
url=r.get("url", ""),
content=r.get("content", ""),
score=r.get("score", 0.0)
)
for r in data.get("results", [])
]
raw_text = data.get("answer", "") or ""
for r in results:
raw_text += f"\n{r.title}. {r.content}"
return ResearchResponse(
query=query,
answer=data.get("answer"),
results=results,
citations=data.get("citations", []),
processing_time_ms=data.get("processing_time_ms", 0),
raw_text=raw_text
)
except Exception as e:
raise Exception(f"Lancer heavy search failed: {str(e)}")
|