imtrt004 commited on
Commit
2aa0b72
Β·
1 Parent(s): 7997082

fix: update backend lib with log

Browse files
Files changed (4) hide show
  1. app.py +46 -9
  2. model/loader.py +70 -31
  3. model/log.py +154 -0
  4. requirements.txt +1 -0
app.py CHANGED
@@ -7,9 +7,12 @@ from supabase import create_client
7
  import uuid
8
  import os
9
  import json
 
 
10
  from typing import Optional
11
 
12
  from model.loader import get_llm, get_model_name, is_llm_ready, switch_model, is_loading, get_loading_status
 
13
  from retrieval.embedder import get_model, embed_chunks, embed_query
14
  from retrieval.vectorstore import (
15
  store_chunks, similarity_search, similarity_search_multi,
@@ -50,17 +53,28 @@ def _supa():
50
  @asynccontextmanager
51
  async def lifespan(app: FastAPI):
52
  import asyncio
53
- print("\U0001f680 Starting up...", flush=True)
54
- get_model() # BGE-small embedding model (~2s)
55
- print(" \u2714 Embedding model ready", flush=True)
56
- # Load the LLM in a thread so the event loop stays responsive
 
 
 
 
 
 
 
 
 
 
57
  loop = asyncio.get_event_loop()
58
  try:
59
  await loop.run_in_executor(None, get_llm)
60
- print(f" \u2714 LLM ready ({get_model_name()})", flush=True)
61
  except Exception as exc:
62
- print(f" \u26a0 LLM load failed: {exc}", flush=True)
63
- print("βœ… Ready", flush=True)
 
64
  yield
65
 
66
 
@@ -83,8 +97,10 @@ async def upload(
83
  user_id: str,
84
  bg: BackgroundTasks,
85
  ):
86
- content = await file.read()
87
  file_size = len(content)
 
 
88
 
89
  ok, msg = can_upload(user_id, file_size)
90
  if not ok:
@@ -93,6 +109,7 @@ async def upload(
93
  # ── Storage capacity gate ─────────────────────────────────────────────
94
  if is_storage_near_full(file_size):
95
  # Queue the upload; it will be processed once expired docs are purged
 
96
  result = enqueue_upload(
97
  user_id=user_id,
98
  filename=file.filename or "upload",
@@ -127,7 +144,7 @@ async def upload(
127
 
128
  # Process in background (parse β†’ chunk β†’ embed β†’ store)
129
  bg.add_task(_process_doc, content, doc_id, user_id, expires, file.filename)
130
-
131
  return {"doc_id": doc_id, "status": "processing", "expires_at": expires.isoformat()}
132
 
133
 
@@ -200,14 +217,33 @@ async def process_from_storage(
200
 
201
  async def _process_doc(content, doc_id, user_id, expires, filename):
202
  supa = _supa()
 
 
 
203
  try:
 
204
  pages = parse_file_pages(content, filename)
 
 
 
205
  chunks = smart_chunk_pages(pages, filename=filename)
 
 
 
206
  embeds = embed_chunks([c.text for c in chunks])
 
 
 
207
  store_chunks(doc_id, user_id, chunks, embeds, expires)
 
208
  supa.table("documents").update({"status": "ready", "chunk_count": len(chunks)}) \
209
  .eq("id", doc_id).execute()
 
 
 
 
210
  except Exception as e:
 
211
  supa.table("documents").update({"status": "error", "error": str(e)}) \
212
  .eq("id", doc_id).execute()
213
 
@@ -465,6 +501,7 @@ async def llm_switch(req: LLMSwitchRequest, bg: BackgroundTasks):
465
  if get_model_name() == req.model and is_llm_ready():
466
  return {"ok": True, "switching": False, "model": req.model, "msg": "Already active"}
467
 
 
468
  bg.add_task(_do_switch_model, req.model)
469
  return {"ok": True, "switching": True, "model": req.model}
470
 
 
7
  import uuid
8
  import os
9
  import json
10
+ import time
11
+ from datetime import datetime, timezone
12
  from typing import Optional
13
 
14
  from model.loader import get_llm, get_model_name, is_llm_ready, switch_model, is_loading, get_loading_status
15
+ from model.log import banner, section, step, ok, warn, error
16
  from retrieval.embedder import get_model, embed_chunks, embed_query
17
  from retrieval.vectorstore import (
18
  store_chunks, similarity_search, similarity_search_multi,
 
53
  @asynccontextmanager
54
  async def lifespan(app: FastAPI):
55
  import asyncio
56
+ ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
57
+ banner(f"LUMINARY BACKEND Β· {ts}")
58
+
59
+ section("STARTUP", "Embedding model")
60
+ step("STARTUP", "Loading BAAI/bge-small-en-v1.5…")
61
+ try:
62
+ t0 = time.perf_counter()
63
+ get_model()
64
+ ok("STARTUP", f"Embedding model ready ({time.perf_counter() - t0:.1f}s)")
65
+ except Exception as exc:
66
+ error("STARTUP", f"Embedding model failed: {exc}")
67
+
68
+ section("STARTUP", "LLM")
69
+ step("STARTUP", f"Loading {get_model_name()} in background thread…")
70
  loop = asyncio.get_event_loop()
71
  try:
72
  await loop.run_in_executor(None, get_llm)
73
+ ok("STARTUP", f"LLM ready β†’ {get_model_name()}")
74
  except Exception as exc:
75
+ error("STARTUP", f"LLM load failed: {exc}")
76
+
77
+ section("STARTUP", "All systems go")
78
  yield
79
 
80
 
 
97
  user_id: str,
98
  bg: BackgroundTasks,
99
  ):
100
+ content = await file.read()
101
  file_size = len(content)
102
+ size_kb = file_size / 1024
103
+ step("UPLOAD", f"{file.filename} Β· {size_kb:.0f} KB Β· user={user_id[:8]}")
104
 
105
  ok, msg = can_upload(user_id, file_size)
106
  if not ok:
 
109
  # ── Storage capacity gate ─────────────────────────────────────────────
110
  if is_storage_near_full(file_size):
111
  # Queue the upload; it will be processed once expired docs are purged
112
+ warn("UPLOAD", "Storage near full β€” queueing upload")
113
  result = enqueue_upload(
114
  user_id=user_id,
115
  filename=file.filename or "upload",
 
144
 
145
  # Process in background (parse β†’ chunk β†’ embed β†’ store)
146
  bg.add_task(_process_doc, content, doc_id, user_id, expires, file.filename)
147
+ ok("UPLOAD", f"Accepted Β· doc={doc_id[:8]} Β· expires={expires.date()}")
148
  return {"doc_id": doc_id, "status": "processing", "expires_at": expires.isoformat()}
149
 
150
 
 
217
 
218
  async def _process_doc(content, doc_id, user_id, expires, filename):
219
  supa = _supa()
220
+ t0 = time.perf_counter()
221
+ short_id = doc_id[:8]
222
+ section("PROCESS", f"{filename} [{short_id}]")
223
  try:
224
+ step("PROCESS", f"Parsing {filename}")
225
  pages = parse_file_pages(content, filename)
226
+ ok("PROCESS", f"Parsed β†’ {len(pages)} page(s)")
227
+
228
+ step("PROCESS", "Chunking pages…")
229
  chunks = smart_chunk_pages(pages, filename=filename)
230
+ ok("PROCESS", f"Chunked β†’ {len(chunks)} chunk(s)")
231
+
232
+ step("PROCESS", f"Embedding {len(chunks)} chunks…")
233
  embeds = embed_chunks([c.text for c in chunks])
234
+ ok("PROCESS", f"Embedded ({len(embeds)} vectors)")
235
+
236
+ step("PROCESS", "Storing vectors in Supabase…")
237
  store_chunks(doc_id, user_id, chunks, embeds, expires)
238
+
239
  supa.table("documents").update({"status": "ready", "chunk_count": len(chunks)}) \
240
  .eq("id", doc_id).execute()
241
+
242
+ elapsed = time.perf_counter() - t0
243
+ ok("PROCESS", f"Document ready Β· {len(chunks)} chunks Β· {elapsed:.2f}s [{short_id}]")
244
+
245
  except Exception as e:
246
+ error("PROCESS", f"{filename} [{short_id}] β†’ {e}")
247
  supa.table("documents").update({"status": "error", "error": str(e)}) \
248
  .eq("id", doc_id).execute()
249
 
 
501
  if get_model_name() == req.model and is_llm_ready():
502
  return {"ok": True, "switching": False, "model": req.model, "msg": "Already active"}
503
 
504
+ step("SWITCH", f"Admin requested {get_model_name()} β†’ {req.model}")
505
  bg.add_task(_do_switch_model, req.model)
506
  return {"ok": True, "switching": True, "model": req.model}
507
 
model/loader.py CHANGED
@@ -16,6 +16,7 @@ Model options (set LLM_MODEL env var in HF Space to switch, no redeploy needed):
16
 
17
  Note:
18
  - EXAONE requires trust_remote_code=True (LG AI custom architecture).
 
19
  - Llama 3.2 and Gemma 3 may require a HF_TOKEN env var (gated models).
20
  - Qwen3 supports /think and /no_think prefixes for reasoning depth control.
21
  """
@@ -25,6 +26,7 @@ import time
25
  import threading
26
  import torch
27
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
28
 
29
  MODEL_ID = os.environ.get("LLM_MODEL", "HuggingFaceTB/SmolLM2-360M-Instruct")
30
 
@@ -49,35 +51,63 @@ _switch_lock = threading.Lock()
49
 
50
  def _load() -> None:
51
  global _tokenizer, _llm, _llm_ready, _loading_msg
 
52
  if _llm is not None:
53
  return
54
 
55
- t0 = time.time()
56
- sep = "-" * 60
57
- print(f"\n{sep}", flush=True)
58
- print(f" Loading {MODEL_ID}", flush=True)
59
- print(f" First boot downloads model weights then caches to disk.", flush=True)
60
- print(f"{sep}\n", flush=True)
61
-
62
  _trc = _needs_trust_remote_code(MODEL_ID)
63
 
64
- _loading_msg = f"Loading tokenizer for {MODEL_ID}…"
65
- _tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=_trc)
66
- print(" Tokenizer loaded.", flush=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- _loading_msg = f"Loading model weights for {MODEL_ID}… (may download on first run)"
69
- _llm = AutoModelForCausalLM.from_pretrained(
70
- MODEL_ID,
71
- torch_dtype=torch.bfloat16,
72
- trust_remote_code=_trc,
73
- )
74
  _llm.eval()
75
  _llm_ready = True
76
  _loading_msg = ""
77
- elapsed = time.time() - t0
78
- print(f"\n{sep}", flush=True)
79
- print(f" {MODEL_ID} ready in {elapsed:.1f}s", flush=True)
80
- print(f"{sep}\n", flush=True)
 
81
 
82
 
83
  def get_tokenizer() -> AutoTokenizer:
@@ -116,32 +146,41 @@ def switch_model(new_model_id: str) -> None:
116
  global _loading, _loading_msg, _loading_error
117
 
118
  with _switch_lock:
119
- _loading = True
 
 
 
120
  _loading_error = None
121
- _loading_msg = f"Unloading {MODEL_ID}…"
122
- _llm_ready = False
123
 
124
- # Release model from memory
 
125
  try:
126
  import gc
127
- _llm = None
128
  _tokenizer = None
129
  gc.collect()
130
  if torch.cuda.is_available():
131
  torch.cuda.empty_cache()
132
- except Exception:
133
- pass
 
 
134
 
135
  MODEL_ID = new_model_id
 
136
 
137
  try:
138
- _load() # uses updated MODULE_ID; sets _llm_ready = True
139
  _loading = False
 
140
  except Exception as exc:
141
  _loading_error = str(exc)
142
- _loading = False
143
- _loading_msg = ""
 
144
 
145
 
146
  def is_llm_ready() -> bool:
147
- return _llm_ready
 
16
 
17
  Note:
18
  - EXAONE requires trust_remote_code=True (LG AI custom architecture).
19
+ Requires transformers>=4.46.0 for RopeParameters support.
20
  - Llama 3.2 and Gemma 3 may require a HF_TOKEN env var (gated models).
21
  - Qwen3 supports /think and /no_think prefixes for reasoning depth control.
22
  """
 
26
  import threading
27
  import torch
28
  from transformers import AutoTokenizer, AutoModelForCausalLM
29
+ from model.log import section, step, ok, warn, error
30
 
31
  MODEL_ID = os.environ.get("LLM_MODEL", "HuggingFaceTB/SmolLM2-360M-Instruct")
32
 
 
51
 
52
  def _load() -> None:
53
  global _tokenizer, _llm, _llm_ready, _loading_msg
54
+
55
  if _llm is not None:
56
  return
57
 
58
+ t0 = time.perf_counter()
 
 
 
 
 
 
59
  _trc = _needs_trust_remote_code(MODEL_ID)
60
 
61
+ section("MODEL", f"Loading {MODEL_ID}")
62
+
63
+ if _trc:
64
+ step("MODEL", "trust_remote_code=True (custom architecture)")
65
+
66
+ # ── Tokenizer ─────────────────────────────────────────────────────────────
67
+ _loading_msg = f"Loading tokenizer…"
68
+ step("MODEL", f"Fetching tokenizer…")
69
+ try:
70
+ _tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=_trc)
71
+ ok("MODEL", "Tokenizer loaded")
72
+ except Exception as exc:
73
+ error("MODEL", f"Tokenizer load failed β†’ {exc}")
74
+ raise
75
+
76
+ # ── Weights ───────────────────────────────────────────────────────────────
77
+ _loading_msg = "Loading model weights… (downloads on first run, then cached)"
78
+ step("MODEL", "Loading weights (first run will download β€” subsequent boots use cache)")
79
+
80
+ device_info = "CUDA" if torch.cuda.is_available() else "CPU"
81
+ step("MODEL", f"Device: {device_info} Β· dtype: bfloat16")
82
+
83
+ try:
84
+ _llm = AutoModelForCausalLM.from_pretrained(
85
+ MODEL_ID,
86
+ torch_dtype=torch.bfloat16,
87
+ trust_remote_code=_trc,
88
+ )
89
+ except ImportError as exc:
90
+ _hint = ""
91
+ if "RopeParameters" in str(exc):
92
+ _hint = (
93
+ "\n Hint: EXAONE-3.5 requires transformers>=4.46.0.\n"
94
+ " Ensure requirements.txt contains transformers>=4.46.0\n"
95
+ " and rebuild/restart the Space."
96
+ )
97
+ error("MODEL", f"{exc}{_hint}")
98
+ raise
99
+ except Exception as exc:
100
+ error("MODEL", str(exc))
101
+ raise
102
 
 
 
 
 
 
 
103
  _llm.eval()
104
  _llm_ready = True
105
  _loading_msg = ""
106
+
107
+ elapsed = time.perf_counter() - t0
108
+ params = sum(p.numel() for p in _llm.parameters()) / 1e6
109
+ ok("MODEL", f"Ready Β· {params:.0f}M params Β· {elapsed:.1f}s")
110
+ section("MODEL", "Model online")
111
 
112
 
113
  def get_tokenizer() -> AutoTokenizer:
 
146
  global _loading, _loading_msg, _loading_error
147
 
148
  with _switch_lock:
149
+ prev = MODEL_ID
150
+ section("SWITCH", f"{prev} β†’ {new_model_id}")
151
+
152
+ _loading = True
153
  _loading_error = None
154
+ _loading_msg = f"Unloading {prev}…"
155
+ _llm_ready = False
156
 
157
+ # ── Release current model from memory ─────────────────────────────────
158
+ step("SWITCH", f"Unloading {prev}")
159
  try:
160
  import gc
161
+ _llm = None
162
  _tokenizer = None
163
  gc.collect()
164
  if torch.cuda.is_available():
165
  torch.cuda.empty_cache()
166
+ step("SWITCH", "CUDA cache cleared")
167
+ ok("SWITCH", "Memory freed")
168
+ except Exception as exc:
169
+ warn("SWITCH", f"Cleanup warning: {exc}")
170
 
171
  MODEL_ID = new_model_id
172
+ step("SWITCH", f"Starting load of {new_model_id}")
173
 
174
  try:
175
+ _load() # uses updated MODEL_ID; sets _llm_ready = True
176
  _loading = False
177
+ ok("SWITCH", f"Switch complete β†’ {new_model_id}")
178
  except Exception as exc:
179
  _loading_error = str(exc)
180
+ _loading = False
181
+ _loading_msg = ""
182
+ error("SWITCH", f"Failed to load {new_model_id}\n {exc}")
183
 
184
 
185
  def is_llm_ready() -> bool:
186
+ return _llm_ready
model/log.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Shared structured logger for the Luminary HF backend.
3
+ Outputs readable, sectioned logs that are easy to scan in the HF Space container view.
4
+ """
5
+
6
+ from __future__ import annotations
7
+ import logging
8
+ import sys
9
+ import time
10
+ from datetime import datetime, timezone
11
+
12
+ # ── ANSI colour palette ──────────────────────────────────────────────────────
13
+ _R = "\033[0m" # reset
14
+ _B = "\033[1m" # bold
15
+ _DIM = "\033[2m" # dim
16
+ _GRN = "\033[32m" # green
17
+ _CYN = "\033[36m" # cyan
18
+ _YLW = "\033[33m" # yellow
19
+ _RED = "\033[31m" # red
20
+ _MAG = "\033[35m" # magenta
21
+ _BLU = "\033[34m" # blue
22
+ _WHT = "\033[97m" # bright white
23
+
24
+ _TAG_COLORS: dict[str, str] = {
25
+ "STARTUP": _CYN,
26
+ "MODEL": _MAG,
27
+ "UPLOAD": _BLU,
28
+ "PROCESS": _BLU,
29
+ "CHAT": _GRN,
30
+ "QUIZ": _GRN,
31
+ "SWITCH": _YLW,
32
+ "ERROR": _RED,
33
+ "HEALTH": _DIM,
34
+ }
35
+
36
+
37
+ class _FmtHandler(logging.StreamHandler):
38
+ """Formatter that wraps log records into readable tag-prefixed lines."""
39
+
40
+ def emit(self, record: logging.LogRecord) -> None:
41
+ try:
42
+ tag = getattr(record, "tag", record.levelname)
43
+ msg = record.getMessage()
44
+ color = _TAG_COLORS.get(tag, _WHT)
45
+ ts = datetime.now(timezone.utc).strftime("%H:%M:%S")
46
+ prefix = f"{_DIM}{ts}{_R} {color}{_B}[{tag}]{_R}"
47
+ # indent continuation lines
48
+ lines = msg.splitlines()
49
+ out = prefix + " " + lines[0]
50
+ for line in lines[1:]:
51
+ out += "\n" + (" " * (len(ts) + len(tag) + 5)) + line
52
+ sys.stdout.write(out + "\n")
53
+ sys.stdout.flush()
54
+ except Exception:
55
+ self.handleError(record)
56
+
57
+
58
+ # ── Module-level logger setup ─────────────────────────────────────────────────
59
+ _handler = _FmtHandler()
60
+ _handler.setFormatter(logging.Formatter("%(message)s"))
61
+
62
+ log = logging.getLogger("luminary")
63
+ log.setLevel(logging.DEBUG)
64
+ if not log.handlers:
65
+ log.addHandler(_handler)
66
+ log.propagate = False
67
+
68
+
69
+ # ── Convenience helpers ───────────────────────────────────────────────────────
70
+
71
+ def _tag(tag: str) -> dict:
72
+ return {"extra": {"tag": tag}}
73
+
74
+
75
+ def banner(title: str, width: int = 58) -> None:
76
+ """Print a prominent box banner (e.g. at startup)."""
77
+ bar = "━" * width
78
+ inner = title.center(width)
79
+ sys.stdout.write(
80
+ f"\n{_B}{_CYN}β”Œ{bar}┐\n"
81
+ f"β”‚{_WHT}{_B}{inner}{_CYN}β”‚\n"
82
+ f"β””{bar}β”˜{_R}\n\n"
83
+ )
84
+ sys.stdout.flush()
85
+
86
+
87
+ def section(tag: str, msg: str) -> None:
88
+ """Print a thin divider line with an annotation."""
89
+ color = _TAG_COLORS.get(tag, _WHT)
90
+ ts = datetime.now(timezone.utc).strftime("%H:%M:%S")
91
+ width = max(0, 58 - len(tag) - len(msg) - 4)
92
+ bar = "─" * width
93
+ sys.stdout.write(f"{_DIM}{ts}{_R} {color}{_B}[{tag}]{_R} {_DIM}{msg} {bar}{_R}\n")
94
+ sys.stdout.flush()
95
+
96
+
97
+ def ok(tag: str, msg: str) -> None:
98
+ color = _TAG_COLORS.get(tag, _WHT)
99
+ ts = datetime.now(timezone.utc).strftime("%H:%M:%S")
100
+ sys.stdout.write(f"{_DIM}{ts}{_R} {color}{_B}[{tag}]{_R} {_GRN}βœ“{_R} {msg}\n")
101
+ sys.stdout.flush()
102
+
103
+
104
+ def step(tag: str, msg: str) -> None:
105
+ color = _TAG_COLORS.get(tag, _WHT)
106
+ ts = datetime.now(timezone.utc).strftime("%H:%M:%S")
107
+ sys.stdout.write(f"{_DIM}{ts}{_R} {color}{_B}[{tag}]{_R} {_DIM}β†’{_R} {msg}\n")
108
+ sys.stdout.flush()
109
+
110
+
111
+ def warn(tag: str, msg: str) -> None:
112
+ color = _TAG_COLORS.get(tag, _WHT)
113
+ ts = datetime.now(timezone.utc).strftime("%H:%M:%S")
114
+ sys.stdout.write(f"{_DIM}{ts}{_R} {color}{_B}[{tag}]{_R} {_YLW}⚠{_R} {_YLW}{msg}{_R}\n")
115
+ sys.stdout.flush()
116
+
117
+
118
+ def error(tag: str, msg: str) -> None:
119
+ ts = datetime.now(timezone.utc).strftime("%H:%M:%S")
120
+ sys.stdout.write(
121
+ f"{_DIM}{ts}{_R} {_RED}{_B}[{tag}]{_R} "
122
+ f"{_RED}βœ• Error{_R}\n"
123
+ f"{' ' * (len(ts) + len(tag) + 5)}{_RED}{msg}{_R}\n"
124
+ )
125
+ sys.stdout.flush()
126
+
127
+
128
+ class Timer:
129
+ """Context manager / manual stopwatch with labelled output."""
130
+ def __init__(self, tag: str, label: str) -> None:
131
+ self.tag = tag
132
+ self.label = label
133
+ self._t0: float = 0.0
134
+
135
+ def start(self) -> "Timer":
136
+ self._t0 = time.perf_counter()
137
+ return self
138
+
139
+ def elapsed(self) -> float:
140
+ return time.perf_counter() - self._t0
141
+
142
+ def done(self, extra: str = "") -> float:
143
+ secs = self.elapsed()
144
+ msg = f"{self.label} {_DIM}({secs:.2f}s){_R}"
145
+ if extra:
146
+ msg += f" {_DIM}{extra}{_R}"
147
+ ok(self.tag, msg)
148
+ return secs
149
+
150
+ def __enter__(self) -> "Timer":
151
+ return self.start()
152
+
153
+ def __exit__(self, *_) -> None:
154
+ self.done()
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
  fastapi
2
  uvicorn[standard]==0.34.0
3
  sentence-transformers==4.1.0
 
4
  huggingface-hub>=0.31.0
5
  supabase==2.13.0
6
  pymupdf==1.25.3
 
1
  fastapi
2
  uvicorn[standard]==0.34.0
3
  sentence-transformers==4.1.0
4
+ transformers>=4.46.0
5
  huggingface-hub>=0.31.0
6
  supabase==2.13.0
7
  pymupdf==1.25.3