nanoindex-api / app.py
shhdwi's picture
Upload folder using huggingface_hub
1d763b8 verified
"""NanoIndex API on Hugging Face Spaces.
FastAPI server that answers questions about documents using NanoIndex.
Trees are fetched from the shhdwi/nanoindex-trees dataset on HF.
"""
import os
import shutil
from pathlib import Path
from fastapi import FastAPI, Header
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
app = FastAPI(title="NanoIndex API")
# Allow CORS from any origin (Vercel frontend)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
HF_DATASET = "shhdwi/nanoindex-trees"
CACHE_DIR = Path("/tmp/nanoindex_trees")
CACHE_DIR.mkdir(parents=True, exist_ok=True)
def get_tree_path(doc_name: str) -> Path:
"""Download tree from HF dataset if not cached."""
safe = "".join(c for c in doc_name if c.isalnum() or c in "_-.")
local = CACHE_DIR / f"{safe}.json"
if local.exists():
return local
from huggingface_hub import hf_hub_download
token = os.environ.get("HF_TOKEN", "")
for prefix in ["financebench/trees", "mmlongbench/trees"]:
try:
downloaded = hf_hub_download(
repo_id=HF_DATASET,
filename=f"{prefix}/{safe}.json",
repo_type="dataset",
token=token or None,
)
shutil.copy2(downloaded, local)
return local
except Exception:
continue
raise FileNotFoundError(f"Tree not found: {doc_name}")
# Cache NanoIndex instance
_ni_instance = None
def get_ni():
global _ni_instance
if _ni_instance is not None:
return _ni_instance
from nanoindex import NanoIndex
kwargs = {}
nanonets_key = os.environ.get("NANONETS_API_KEY", "")
if nanonets_key:
kwargs["nanonets_api_key"] = nanonets_key
for env_key, model in [
("ANTHROPIC_API_KEY", "anthropic:claude-sonnet-4-6"),
("OPENAI_API_KEY", "openai:gpt-5.4"),
("GOOGLE_API_KEY", "google:gemini-2.5-flash"),
]:
if os.environ.get(env_key):
kwargs["llm"] = model
break
_ni_instance = NanoIndex(**kwargs)
return _ni_instance
class AskRequest(BaseModel):
question: str
doc_name: str
mode: str = "fast"
class Citation(BaseModel):
node_id: str
title: str
pages: list[int] = []
class AskResponse(BaseModel):
content: str
mode: str
citations: list[Citation] = []
error: str | None = None
@app.post("/ask", response_model=AskResponse)
def ask(req: AskRequest, authorization: str | None = Header(default=None)):
"""Answer a question about a document."""
# Simple API key check - set NANOINDEX_SPACE_KEY in HF secrets and Vercel env
space_key = os.environ.get("NANOINDEX_SPACE_KEY", "")
if space_key and authorization != f"Bearer {space_key}":
return AskResponse(content="", mode="", error="Unauthorized")
from nanoindex.utils.tree_ops import load_tree
try:
tree_path = get_tree_path(req.doc_name)
tree = load_tree(tree_path)
except FileNotFoundError as e:
return AskResponse(content="", mode="", error=str(e))
except Exception as e:
return AskResponse(content="", mode="", error=f"Failed to load tree: {e}")
try:
ni = get_ni()
answer = ni.ask(req.question, tree, mode=req.mode)
except Exception as e:
return AskResponse(content="", mode="", error=f"Query failed: {e}")
return AskResponse(
content=answer.content,
mode=answer.mode,
citations=[
Citation(node_id=c.node_id, title=c.title, pages=c.pages)
for c in answer.citations
],
)
@app.get("/health")
def health():
return {"status": "ok", "service": "nanoindex-api"}
@app.get("/")
def root():
return {
"service": "NanoIndex API",
"docs": "/docs",
"usage": "POST /ask with {question, doc_name, mode}",
}