mmoise00 commited on
Commit
fe55c4e
·
1 Parent(s): f6d0241

lazy-load RAG deps and pin frontend versions

Browse files
Files changed (3) hide show
  1. backend/api.py +1 -2
  2. frontend/package.json +8 -8
  3. main.py +51 -17
backend/api.py CHANGED
@@ -34,7 +34,6 @@ from pydantic import BaseModel
34
  from main import (
35
  CITATION_SYSTEM_PROMPT,
36
  DEFAULT_WORKING_DIR,
37
- QueryParam,
38
  initialize_rag,
39
  llm_model_func,
40
  query_rag,
@@ -343,7 +342,7 @@ def _looks_like_no_answer(answer: str) -> bool:
343
 
344
  def _dynamic_query_param(
345
  mode: str, original: str, rewritten: str, retry_level: int
346
- ) -> QueryParam:
347
  base_top_k = int(os.getenv("TOP_K", "40"))
348
  base_chunk_top_k = int(os.getenv("CHUNK_TOP_K", "20"))
349
  text = f"{original} {rewritten}".lower()
 
34
  from main import (
35
  CITATION_SYSTEM_PROMPT,
36
  DEFAULT_WORKING_DIR,
 
37
  initialize_rag,
38
  llm_model_func,
39
  query_rag,
 
342
 
343
  def _dynamic_query_param(
344
  mode: str, original: str, rewritten: str, retry_level: int
345
+ ) -> "QueryParam":
346
  base_top_k = int(os.getenv("TOP_K", "40"))
347
  base_chunk_top_k = int(os.getenv("CHUNK_TOP_K", "20"))
348
  text = f"{original} {rewritten}".lower()
frontend/package.json CHANGED
@@ -8,18 +8,18 @@
8
  "start": "next start"
9
  },
10
  "dependencies": {
11
- "@xyflow/react": "^12.10.2",
12
  "next": "16.2.1",
13
  "react": "19.2.4",
14
  "react-dom": "19.2.4",
15
- "react-markdown": "^10.1.0"
16
  },
17
  "devDependencies": {
18
- "@tailwindcss/postcss": "^4",
19
- "@types/node": "^20",
20
- "@types/react": "^19",
21
- "@types/react-dom": "^19",
22
- "tailwindcss": "^4",
23
- "typescript": "^5"
24
  }
25
  }
 
8
  "start": "next start"
9
  },
10
  "dependencies": {
11
+ "@xyflow/react": "12.10.2",
12
  "next": "16.2.1",
13
  "react": "19.2.4",
14
  "react-dom": "19.2.4",
15
+ "react-markdown": "10.1.0"
16
  },
17
  "devDependencies": {
18
+ "@tailwindcss/postcss": "4.0.0",
19
+ "@types/node": "20.0.0",
20
+ "@types/react": "19.0.0",
21
+ "@types/react-dom": "19.0.0",
22
+ "tailwindcss": "4.0.0",
23
+ "typescript": "5.0.0"
24
  }
25
  }
main.py CHANGED
@@ -6,7 +6,7 @@ import os
6
  import re
7
  import sys
8
  from functools import lru_cache
9
- from typing import Any
10
 
11
 
12
  def ensure_project_venv() -> None:
@@ -30,7 +30,12 @@ ensure_project_venv()
30
  import numpy as np
31
  from datasets import load_dataset
32
  from dotenv import load_dotenv
33
- from sentence_transformers import SentenceTransformer
 
 
 
 
 
34
 
35
 
36
  load_dotenv()
@@ -61,11 +66,15 @@ def configure_logging() -> None:
61
  configure_logging()
62
 
63
 
64
- from lightrag import LightRAG, QueryParam
65
- from lightrag.llm.openai import openai_complete_if_cache
66
- from lightrag.utils import EmbeddingFunc
 
 
 
67
 
68
- # LightRAG configures its own logger during import, so apply our level again.
 
69
  configure_logging()
70
 
71
 
@@ -142,7 +151,11 @@ def configure_langfuse() -> bool:
142
 
143
 
144
  @lru_cache(maxsize=1)
145
- def get_embedder() -> SentenceTransformer:
 
 
 
 
146
  return SentenceTransformer(EMBED_MODEL)
147
 
148
 
@@ -166,6 +179,9 @@ async def llm_model_func(
166
  keyword_extraction=False,
167
  **kwargs,
168
  ) -> str:
 
 
 
169
  api_key = os.getenv("openrouter_key")
170
  if not api_key:
171
  raise ValueError("Missing openrouter_key in .env")
@@ -186,7 +202,12 @@ async def llm_model_func(
186
  )
187
 
188
 
189
- async def initialize_rag(working_dir: str = DEFAULT_WORKING_DIR) -> LightRAG:
 
 
 
 
 
190
  os.makedirs(working_dir, exist_ok=True)
191
 
192
  rag = LightRAG(
@@ -222,7 +243,9 @@ def load_corpus_texts(limit: int) -> list[str]:
222
  return texts
223
 
224
 
225
- async def ingest_corpus(doc_limit: int = 200, working_dir: str = DEFAULT_WORKING_DIR) -> int:
 
 
226
  rag = None
227
  try:
228
  rag = await initialize_rag(working_dir)
@@ -342,7 +365,7 @@ async def query_rag(
342
  rewritten_question: str,
343
  *,
344
  retry_level: int = 0,
345
- ) -> QueryParam:
346
  base_top_k = int(os.getenv("TOP_K", "40"))
347
  base_chunk_top_k = int(os.getenv("CHUNK_TOP_K", "20"))
348
 
@@ -425,7 +448,9 @@ async def query_rag(
425
  if verdict in {"supported", ""} or not isinstance(unsupported_claims, list):
426
  return ""
427
 
428
- cleaned_claims = [str(c).strip() for c in unsupported_claims if str(c).strip()][:5]
 
 
429
  if not cleaned_claims:
430
  return ""
431
 
@@ -464,7 +489,9 @@ async def query_rag(
464
  break
465
 
466
  if selected_result is None:
467
- return "I do not have enough information to answer from the retrieved corpus."
 
 
468
 
469
  answer_text = _extract_llm_text(selected_result)
470
  references = _extract_references(selected_result)
@@ -481,7 +508,9 @@ async def query_rag(
481
 
482
  def parse_args() -> argparse.Namespace:
483
  parser = argparse.ArgumentParser(description="LightRAG over the Chomsky corpus")
484
- parser.add_argument("--ingest", action="store_true", help="Index dataset into LightRAG")
 
 
485
  parser.add_argument("--query", type=str, help="Question to ask")
486
  parser.add_argument(
487
  "--mode",
@@ -490,7 +519,9 @@ def parse_args() -> argparse.Namespace:
490
  choices=["naive", "local", "global", "hybrid", "mix"],
491
  help="LightRAG query mode",
492
  )
493
- parser.add_argument("--doc-limit", type=int, default=200, help="How many docs to index")
 
 
494
  parser.add_argument(
495
  "--working-dir",
496
  type=str,
@@ -502,11 +533,15 @@ def parse_args() -> argparse.Namespace:
502
 
503
  async def run_cli(args: argparse.Namespace) -> None:
504
  if args.ingest:
505
- count = await ingest_corpus(doc_limit=args.doc_limit, working_dir=args.working_dir)
 
 
506
  print(f"Indexed {count} documents into LightRAG store: {args.working_dir}")
507
 
508
  if args.query:
509
- answer = await query_rag(args.query, mode=args.mode, working_dir=args.working_dir)
 
 
510
  print(f"\nQ: {args.query}")
511
  print(f"\nA: {answer}")
512
 
@@ -516,4 +551,3 @@ async def run_cli(args: argparse.Namespace) -> None:
516
 
517
  if __name__ == "__main__":
518
  asyncio.run(run_cli(parse_args()))
519
-
 
6
  import re
7
  import sys
8
  from functools import lru_cache
9
+ from typing import Any, TYPE_CHECKING
10
 
11
 
12
  def ensure_project_venv() -> None:
 
30
  import numpy as np
31
  from datasets import load_dataset
32
  from dotenv import load_dotenv
33
+
34
+ if TYPE_CHECKING:
35
+ # Imported only for type checking; the actual import of
36
+ # SentenceTransformer happens lazily inside get_embedder to
37
+ # keep module import (and thus API startup) lightweight.
38
+ from sentence_transformers import SentenceTransformer
39
 
40
 
41
  load_dotenv()
 
66
  configure_logging()
67
 
68
 
69
+ if TYPE_CHECKING:
70
+ # These imports are heavy (transitively pull in torch, CUDA, etc.).
71
+ # Import them only for type checking; at runtime we import lazily.
72
+ from lightrag import LightRAG, QueryParam
73
+ from lightrag.llm.openai import openai_complete_if_cache
74
+ from lightrag.utils import EmbeddingFunc
75
 
76
+ # LightRAG configures its own logger during import, so apply our level again
77
+ # once we actually import it lazily at runtime (see initialize_rag).
78
  configure_logging()
79
 
80
 
 
151
 
152
 
153
  @lru_cache(maxsize=1)
154
+ def get_embedder() -> "SentenceTransformer":
155
+ # Lazy import avoids loading heavy ML stacks during module import,
156
+ # which helps services like Render bind the HTTP port quickly.
157
+ from sentence_transformers import SentenceTransformer
158
+
159
  return SentenceTransformer(EMBED_MODEL)
160
 
161
 
 
179
  keyword_extraction=False,
180
  **kwargs,
181
  ) -> str:
182
+ # Import here to avoid pulling in heavy dependencies during module import.
183
+ from lightrag.llm.openai import openai_complete_if_cache
184
+
185
  api_key = os.getenv("openrouter_key")
186
  if not api_key:
187
  raise ValueError("Missing openrouter_key in .env")
 
202
  )
203
 
204
 
205
+ async def initialize_rag(working_dir: str = DEFAULT_WORKING_DIR) -> "LightRAG":
206
+ # Lazy imports keep startup fast and avoid loading the full
207
+ # LightRAG/torch stack until we actually need RAG functionality.
208
+ from lightrag import LightRAG
209
+ from lightrag.utils import EmbeddingFunc
210
+
211
  os.makedirs(working_dir, exist_ok=True)
212
 
213
  rag = LightRAG(
 
243
  return texts
244
 
245
 
246
+ async def ingest_corpus(
247
+ doc_limit: int = 200, working_dir: str = DEFAULT_WORKING_DIR
248
+ ) -> int:
249
  rag = None
250
  try:
251
  rag = await initialize_rag(working_dir)
 
365
  rewritten_question: str,
366
  *,
367
  retry_level: int = 0,
368
+ ) -> "QueryParam":
369
  base_top_k = int(os.getenv("TOP_K", "40"))
370
  base_chunk_top_k = int(os.getenv("CHUNK_TOP_K", "20"))
371
 
 
448
  if verdict in {"supported", ""} or not isinstance(unsupported_claims, list):
449
  return ""
450
 
451
+ cleaned_claims = [
452
+ str(c).strip() for c in unsupported_claims if str(c).strip()
453
+ ][:5]
454
  if not cleaned_claims:
455
  return ""
456
 
 
489
  break
490
 
491
  if selected_result is None:
492
+ return (
493
+ "I do not have enough information to answer from the retrieved corpus."
494
+ )
495
 
496
  answer_text = _extract_llm_text(selected_result)
497
  references = _extract_references(selected_result)
 
508
 
509
  def parse_args() -> argparse.Namespace:
510
  parser = argparse.ArgumentParser(description="LightRAG over the Chomsky corpus")
511
+ parser.add_argument(
512
+ "--ingest", action="store_true", help="Index dataset into LightRAG"
513
+ )
514
  parser.add_argument("--query", type=str, help="Question to ask")
515
  parser.add_argument(
516
  "--mode",
 
519
  choices=["naive", "local", "global", "hybrid", "mix"],
520
  help="LightRAG query mode",
521
  )
522
+ parser.add_argument(
523
+ "--doc-limit", type=int, default=200, help="How many docs to index"
524
+ )
525
  parser.add_argument(
526
  "--working-dir",
527
  type=str,
 
533
 
534
  async def run_cli(args: argparse.Namespace) -> None:
535
  if args.ingest:
536
+ count = await ingest_corpus(
537
+ doc_limit=args.doc_limit, working_dir=args.working_dir
538
+ )
539
  print(f"Indexed {count} documents into LightRAG store: {args.working_dir}")
540
 
541
  if args.query:
542
+ answer = await query_rag(
543
+ args.query, mode=args.mode, working_dir=args.working_dir
544
+ )
545
  print(f"\nQ: {args.query}")
546
  print(f"\nA: {answer}")
547
 
 
551
 
552
  if __name__ == "__main__":
553
  asyncio.run(run_cli(parse_args()))