"""NanoIndex API on Hugging Face Spaces. FastAPI server that answers questions about documents using NanoIndex. Trees are fetched from the shhdwi/nanoindex-trees dataset on HF. """ import os import shutil from pathlib import Path from fastapi import FastAPI, Header from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel app = FastAPI(title="NanoIndex API") # Allow CORS from any origin (Vercel frontend) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) HF_DATASET = "shhdwi/nanoindex-trees" CACHE_DIR = Path("/tmp/nanoindex_trees") CACHE_DIR.mkdir(parents=True, exist_ok=True) def get_tree_path(doc_name: str) -> Path: """Download tree from HF dataset if not cached.""" safe = "".join(c for c in doc_name if c.isalnum() or c in "_-.") local = CACHE_DIR / f"{safe}.json" if local.exists(): return local from huggingface_hub import hf_hub_download token = os.environ.get("HF_TOKEN", "") for prefix in ["financebench/trees", "mmlongbench/trees"]: try: downloaded = hf_hub_download( repo_id=HF_DATASET, filename=f"{prefix}/{safe}.json", repo_type="dataset", token=token or None, ) shutil.copy2(downloaded, local) return local except Exception: continue raise FileNotFoundError(f"Tree not found: {doc_name}") # Cache NanoIndex instance _ni_instance = None def get_ni(): global _ni_instance if _ni_instance is not None: return _ni_instance from nanoindex import NanoIndex kwargs = {} nanonets_key = os.environ.get("NANONETS_API_KEY", "") if nanonets_key: kwargs["nanonets_api_key"] = nanonets_key for env_key, model in [ ("ANTHROPIC_API_KEY", "anthropic:claude-sonnet-4-6"), ("OPENAI_API_KEY", "openai:gpt-5.4"), ("GOOGLE_API_KEY", "google:gemini-2.5-flash"), ]: if os.environ.get(env_key): kwargs["llm"] = model break _ni_instance = NanoIndex(**kwargs) return _ni_instance class AskRequest(BaseModel): question: str doc_name: str mode: str = "fast" class Citation(BaseModel): node_id: str title: str pages: list[int] = [] class AskResponse(BaseModel): content: str mode: str citations: list[Citation] = [] error: str | None = None @app.post("/ask", response_model=AskResponse) def ask(req: AskRequest, authorization: str | None = Header(default=None)): """Answer a question about a document.""" # Simple API key check - set NANOINDEX_SPACE_KEY in HF secrets and Vercel env space_key = os.environ.get("NANOINDEX_SPACE_KEY", "") if space_key and authorization != f"Bearer {space_key}": return AskResponse(content="", mode="", error="Unauthorized") from nanoindex.utils.tree_ops import load_tree try: tree_path = get_tree_path(req.doc_name) tree = load_tree(tree_path) except FileNotFoundError as e: return AskResponse(content="", mode="", error=str(e)) except Exception as e: return AskResponse(content="", mode="", error=f"Failed to load tree: {e}") try: ni = get_ni() answer = ni.ask(req.question, tree, mode=req.mode) except Exception as e: return AskResponse(content="", mode="", error=f"Query failed: {e}") return AskResponse( content=answer.content, mode=answer.mode, citations=[ Citation(node_id=c.node_id, title=c.title, pages=c.pages) for c in answer.citations ], ) @app.get("/health") def health(): return {"status": "ok", "service": "nanoindex-api"} @app.get("/") def root(): return { "service": "NanoIndex API", "docs": "/docs", "usage": "POST /ask with {question, doc_name, mode}", }