Spaces:
Runtime error
Runtime error
lazy-load RAG deps and pin frontend versions
Browse files- backend/api.py +1 -2
- frontend/package.json +8 -8
- main.py +51 -17
backend/api.py
CHANGED
|
@@ -34,7 +34,6 @@ from pydantic import BaseModel
|
|
| 34 |
from main import (
|
| 35 |
CITATION_SYSTEM_PROMPT,
|
| 36 |
DEFAULT_WORKING_DIR,
|
| 37 |
-
QueryParam,
|
| 38 |
initialize_rag,
|
| 39 |
llm_model_func,
|
| 40 |
query_rag,
|
|
@@ -343,7 +342,7 @@ def _looks_like_no_answer(answer: str) -> bool:
|
|
| 343 |
|
| 344 |
def _dynamic_query_param(
|
| 345 |
mode: str, original: str, rewritten: str, retry_level: int
|
| 346 |
-
) -> QueryParam:
|
| 347 |
base_top_k = int(os.getenv("TOP_K", "40"))
|
| 348 |
base_chunk_top_k = int(os.getenv("CHUNK_TOP_K", "20"))
|
| 349 |
text = f"{original} {rewritten}".lower()
|
|
|
|
| 34 |
from main import (
|
| 35 |
CITATION_SYSTEM_PROMPT,
|
| 36 |
DEFAULT_WORKING_DIR,
|
|
|
|
| 37 |
initialize_rag,
|
| 38 |
llm_model_func,
|
| 39 |
query_rag,
|
|
|
|
| 342 |
|
| 343 |
def _dynamic_query_param(
|
| 344 |
mode: str, original: str, rewritten: str, retry_level: int
|
| 345 |
+
) -> "QueryParam":
|
| 346 |
base_top_k = int(os.getenv("TOP_K", "40"))
|
| 347 |
base_chunk_top_k = int(os.getenv("CHUNK_TOP_K", "20"))
|
| 348 |
text = f"{original} {rewritten}".lower()
|
frontend/package.json
CHANGED
|
@@ -8,18 +8,18 @@
|
|
| 8 |
"start": "next start"
|
| 9 |
},
|
| 10 |
"dependencies": {
|
| 11 |
-
"@xyflow/react": "
|
| 12 |
"next": "16.2.1",
|
| 13 |
"react": "19.2.4",
|
| 14 |
"react-dom": "19.2.4",
|
| 15 |
-
"react-markdown": "
|
| 16 |
},
|
| 17 |
"devDependencies": {
|
| 18 |
-
"@tailwindcss/postcss": "
|
| 19 |
-
"@types/node": "
|
| 20 |
-
"@types/react": "
|
| 21 |
-
"@types/react-dom": "
|
| 22 |
-
"tailwindcss": "
|
| 23 |
-
"typescript": "
|
| 24 |
}
|
| 25 |
}
|
|
|
|
| 8 |
"start": "next start"
|
| 9 |
},
|
| 10 |
"dependencies": {
|
| 11 |
+
"@xyflow/react": "12.10.2",
|
| 12 |
"next": "16.2.1",
|
| 13 |
"react": "19.2.4",
|
| 14 |
"react-dom": "19.2.4",
|
| 15 |
+
"react-markdown": "10.1.0"
|
| 16 |
},
|
| 17 |
"devDependencies": {
|
| 18 |
+
"@tailwindcss/postcss": "4.0.0",
|
| 19 |
+
"@types/node": "20.0.0",
|
| 20 |
+
"@types/react": "19.0.0",
|
| 21 |
+
"@types/react-dom": "19.0.0",
|
| 22 |
+
"tailwindcss": "4.0.0",
|
| 23 |
+
"typescript": "5.0.0"
|
| 24 |
}
|
| 25 |
}
|
main.py
CHANGED
|
@@ -6,7 +6,7 @@ import os
|
|
| 6 |
import re
|
| 7 |
import sys
|
| 8 |
from functools import lru_cache
|
| 9 |
-
from typing import Any
|
| 10 |
|
| 11 |
|
| 12 |
def ensure_project_venv() -> None:
|
|
@@ -30,7 +30,12 @@ ensure_project_venv()
|
|
| 30 |
import numpy as np
|
| 31 |
from datasets import load_dataset
|
| 32 |
from dotenv import load_dotenv
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
|
| 36 |
load_dotenv()
|
|
@@ -61,11 +66,15 @@ def configure_logging() -> None:
|
|
| 61 |
configure_logging()
|
| 62 |
|
| 63 |
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
-
# LightRAG configures its own logger during import, so apply our level again
|
|
|
|
| 69 |
configure_logging()
|
| 70 |
|
| 71 |
|
|
@@ -142,7 +151,11 @@ def configure_langfuse() -> bool:
|
|
| 142 |
|
| 143 |
|
| 144 |
@lru_cache(maxsize=1)
|
| 145 |
-
def get_embedder() -> SentenceTransformer:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
return SentenceTransformer(EMBED_MODEL)
|
| 147 |
|
| 148 |
|
|
@@ -166,6 +179,9 @@ async def llm_model_func(
|
|
| 166 |
keyword_extraction=False,
|
| 167 |
**kwargs,
|
| 168 |
) -> str:
|
|
|
|
|
|
|
|
|
|
| 169 |
api_key = os.getenv("openrouter_key")
|
| 170 |
if not api_key:
|
| 171 |
raise ValueError("Missing openrouter_key in .env")
|
|
@@ -186,7 +202,12 @@ async def llm_model_func(
|
|
| 186 |
)
|
| 187 |
|
| 188 |
|
| 189 |
-
async def initialize_rag(working_dir: str = DEFAULT_WORKING_DIR) -> LightRAG:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
os.makedirs(working_dir, exist_ok=True)
|
| 191 |
|
| 192 |
rag = LightRAG(
|
|
@@ -222,7 +243,9 @@ def load_corpus_texts(limit: int) -> list[str]:
|
|
| 222 |
return texts
|
| 223 |
|
| 224 |
|
| 225 |
-
async def ingest_corpus(
|
|
|
|
|
|
|
| 226 |
rag = None
|
| 227 |
try:
|
| 228 |
rag = await initialize_rag(working_dir)
|
|
@@ -342,7 +365,7 @@ async def query_rag(
|
|
| 342 |
rewritten_question: str,
|
| 343 |
*,
|
| 344 |
retry_level: int = 0,
|
| 345 |
-
) -> QueryParam:
|
| 346 |
base_top_k = int(os.getenv("TOP_K", "40"))
|
| 347 |
base_chunk_top_k = int(os.getenv("CHUNK_TOP_K", "20"))
|
| 348 |
|
|
@@ -425,7 +448,9 @@ async def query_rag(
|
|
| 425 |
if verdict in {"supported", ""} or not isinstance(unsupported_claims, list):
|
| 426 |
return ""
|
| 427 |
|
| 428 |
-
cleaned_claims = [
|
|
|
|
|
|
|
| 429 |
if not cleaned_claims:
|
| 430 |
return ""
|
| 431 |
|
|
@@ -464,7 +489,9 @@ async def query_rag(
|
|
| 464 |
break
|
| 465 |
|
| 466 |
if selected_result is None:
|
| 467 |
-
return
|
|
|
|
|
|
|
| 468 |
|
| 469 |
answer_text = _extract_llm_text(selected_result)
|
| 470 |
references = _extract_references(selected_result)
|
|
@@ -481,7 +508,9 @@ async def query_rag(
|
|
| 481 |
|
| 482 |
def parse_args() -> argparse.Namespace:
|
| 483 |
parser = argparse.ArgumentParser(description="LightRAG over the Chomsky corpus")
|
| 484 |
-
parser.add_argument(
|
|
|
|
|
|
|
| 485 |
parser.add_argument("--query", type=str, help="Question to ask")
|
| 486 |
parser.add_argument(
|
| 487 |
"--mode",
|
|
@@ -490,7 +519,9 @@ def parse_args() -> argparse.Namespace:
|
|
| 490 |
choices=["naive", "local", "global", "hybrid", "mix"],
|
| 491 |
help="LightRAG query mode",
|
| 492 |
)
|
| 493 |
-
parser.add_argument(
|
|
|
|
|
|
|
| 494 |
parser.add_argument(
|
| 495 |
"--working-dir",
|
| 496 |
type=str,
|
|
@@ -502,11 +533,15 @@ def parse_args() -> argparse.Namespace:
|
|
| 502 |
|
| 503 |
async def run_cli(args: argparse.Namespace) -> None:
|
| 504 |
if args.ingest:
|
| 505 |
-
count = await ingest_corpus(
|
|
|
|
|
|
|
| 506 |
print(f"Indexed {count} documents into LightRAG store: {args.working_dir}")
|
| 507 |
|
| 508 |
if args.query:
|
| 509 |
-
answer = await query_rag(
|
|
|
|
|
|
|
| 510 |
print(f"\nQ: {args.query}")
|
| 511 |
print(f"\nA: {answer}")
|
| 512 |
|
|
@@ -516,4 +551,3 @@ async def run_cli(args: argparse.Namespace) -> None:
|
|
| 516 |
|
| 517 |
if __name__ == "__main__":
|
| 518 |
asyncio.run(run_cli(parse_args()))
|
| 519 |
-
|
|
|
|
| 6 |
import re
|
| 7 |
import sys
|
| 8 |
from functools import lru_cache
|
| 9 |
+
from typing import Any, TYPE_CHECKING
|
| 10 |
|
| 11 |
|
| 12 |
def ensure_project_venv() -> None:
|
|
|
|
| 30 |
import numpy as np
|
| 31 |
from datasets import load_dataset
|
| 32 |
from dotenv import load_dotenv
|
| 33 |
+
|
| 34 |
+
if TYPE_CHECKING:
|
| 35 |
+
# Imported only for type checking; the actual import of
|
| 36 |
+
# SentenceTransformer happens lazily inside get_embedder to
|
| 37 |
+
# keep module import (and thus API startup) lightweight.
|
| 38 |
+
from sentence_transformers import SentenceTransformer
|
| 39 |
|
| 40 |
|
| 41 |
load_dotenv()
|
|
|
|
| 66 |
configure_logging()
|
| 67 |
|
| 68 |
|
| 69 |
+
if TYPE_CHECKING:
|
| 70 |
+
# These imports are heavy (transitively pull in torch, CUDA, etc.).
|
| 71 |
+
# Import them only for type checking; at runtime we import lazily.
|
| 72 |
+
from lightrag import LightRAG, QueryParam
|
| 73 |
+
from lightrag.llm.openai import openai_complete_if_cache
|
| 74 |
+
from lightrag.utils import EmbeddingFunc
|
| 75 |
|
| 76 |
+
# LightRAG configures its own logger during import, so apply our level again
|
| 77 |
+
# once we actually import it lazily at runtime (see initialize_rag).
|
| 78 |
configure_logging()
|
| 79 |
|
| 80 |
|
|
|
|
| 151 |
|
| 152 |
|
| 153 |
@lru_cache(maxsize=1)
|
| 154 |
+
def get_embedder() -> "SentenceTransformer":
|
| 155 |
+
# Lazy import avoids loading heavy ML stacks during module import,
|
| 156 |
+
# which helps services like Render bind the HTTP port quickly.
|
| 157 |
+
from sentence_transformers import SentenceTransformer
|
| 158 |
+
|
| 159 |
return SentenceTransformer(EMBED_MODEL)
|
| 160 |
|
| 161 |
|
|
|
|
| 179 |
keyword_extraction=False,
|
| 180 |
**kwargs,
|
| 181 |
) -> str:
|
| 182 |
+
# Import here to avoid pulling in heavy dependencies during module import.
|
| 183 |
+
from lightrag.llm.openai import openai_complete_if_cache
|
| 184 |
+
|
| 185 |
api_key = os.getenv("openrouter_key")
|
| 186 |
if not api_key:
|
| 187 |
raise ValueError("Missing openrouter_key in .env")
|
|
|
|
| 202 |
)
|
| 203 |
|
| 204 |
|
| 205 |
+
async def initialize_rag(working_dir: str = DEFAULT_WORKING_DIR) -> "LightRAG":
|
| 206 |
+
# Lazy imports keep startup fast and avoid loading the full
|
| 207 |
+
# LightRAG/torch stack until we actually need RAG functionality.
|
| 208 |
+
from lightrag import LightRAG
|
| 209 |
+
from lightrag.utils import EmbeddingFunc
|
| 210 |
+
|
| 211 |
os.makedirs(working_dir, exist_ok=True)
|
| 212 |
|
| 213 |
rag = LightRAG(
|
|
|
|
| 243 |
return texts
|
| 244 |
|
| 245 |
|
| 246 |
+
async def ingest_corpus(
|
| 247 |
+
doc_limit: int = 200, working_dir: str = DEFAULT_WORKING_DIR
|
| 248 |
+
) -> int:
|
| 249 |
rag = None
|
| 250 |
try:
|
| 251 |
rag = await initialize_rag(working_dir)
|
|
|
|
| 365 |
rewritten_question: str,
|
| 366 |
*,
|
| 367 |
retry_level: int = 0,
|
| 368 |
+
) -> "QueryParam":
|
| 369 |
base_top_k = int(os.getenv("TOP_K", "40"))
|
| 370 |
base_chunk_top_k = int(os.getenv("CHUNK_TOP_K", "20"))
|
| 371 |
|
|
|
|
| 448 |
if verdict in {"supported", ""} or not isinstance(unsupported_claims, list):
|
| 449 |
return ""
|
| 450 |
|
| 451 |
+
cleaned_claims = [
|
| 452 |
+
str(c).strip() for c in unsupported_claims if str(c).strip()
|
| 453 |
+
][:5]
|
| 454 |
if not cleaned_claims:
|
| 455 |
return ""
|
| 456 |
|
|
|
|
| 489 |
break
|
| 490 |
|
| 491 |
if selected_result is None:
|
| 492 |
+
return (
|
| 493 |
+
"I do not have enough information to answer from the retrieved corpus."
|
| 494 |
+
)
|
| 495 |
|
| 496 |
answer_text = _extract_llm_text(selected_result)
|
| 497 |
references = _extract_references(selected_result)
|
|
|
|
| 508 |
|
| 509 |
def parse_args() -> argparse.Namespace:
|
| 510 |
parser = argparse.ArgumentParser(description="LightRAG over the Chomsky corpus")
|
| 511 |
+
parser.add_argument(
|
| 512 |
+
"--ingest", action="store_true", help="Index dataset into LightRAG"
|
| 513 |
+
)
|
| 514 |
parser.add_argument("--query", type=str, help="Question to ask")
|
| 515 |
parser.add_argument(
|
| 516 |
"--mode",
|
|
|
|
| 519 |
choices=["naive", "local", "global", "hybrid", "mix"],
|
| 520 |
help="LightRAG query mode",
|
| 521 |
)
|
| 522 |
+
parser.add_argument(
|
| 523 |
+
"--doc-limit", type=int, default=200, help="How many docs to index"
|
| 524 |
+
)
|
| 525 |
parser.add_argument(
|
| 526 |
"--working-dir",
|
| 527 |
type=str,
|
|
|
|
| 533 |
|
| 534 |
async def run_cli(args: argparse.Namespace) -> None:
|
| 535 |
if args.ingest:
|
| 536 |
+
count = await ingest_corpus(
|
| 537 |
+
doc_limit=args.doc_limit, working_dir=args.working_dir
|
| 538 |
+
)
|
| 539 |
print(f"Indexed {count} documents into LightRAG store: {args.working_dir}")
|
| 540 |
|
| 541 |
if args.query:
|
| 542 |
+
answer = await query_rag(
|
| 543 |
+
args.query, mode=args.mode, working_dir=args.working_dir
|
| 544 |
+
)
|
| 545 |
print(f"\nQ: {args.query}")
|
| 546 |
print(f"\nA: {answer}")
|
| 547 |
|
|
|
|
| 551 |
|
| 552 |
if __name__ == "__main__":
|
| 553 |
asyncio.run(run_cli(parse_args()))
|
|
|