Spaces:
Running
Running
imtrt004 commited on
Commit Β·
2aa0b72
1
Parent(s): 7997082
fix: update backend lib with log
Browse files- app.py +46 -9
- model/loader.py +70 -31
- model/log.py +154 -0
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -7,9 +7,12 @@ from supabase import create_client
|
|
| 7 |
import uuid
|
| 8 |
import os
|
| 9 |
import json
|
|
|
|
|
|
|
| 10 |
from typing import Optional
|
| 11 |
|
| 12 |
from model.loader import get_llm, get_model_name, is_llm_ready, switch_model, is_loading, get_loading_status
|
|
|
|
| 13 |
from retrieval.embedder import get_model, embed_chunks, embed_query
|
| 14 |
from retrieval.vectorstore import (
|
| 15 |
store_chunks, similarity_search, similarity_search_multi,
|
|
@@ -50,17 +53,28 @@ def _supa():
|
|
| 50 |
@asynccontextmanager
|
| 51 |
async def lifespan(app: FastAPI):
|
| 52 |
import asyncio
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
loop = asyncio.get_event_loop()
|
| 58 |
try:
|
| 59 |
await loop.run_in_executor(None, get_llm)
|
| 60 |
-
|
| 61 |
except Exception as exc:
|
| 62 |
-
|
| 63 |
-
|
|
|
|
| 64 |
yield
|
| 65 |
|
| 66 |
|
|
@@ -83,8 +97,10 @@ async def upload(
|
|
| 83 |
user_id: str,
|
| 84 |
bg: BackgroundTasks,
|
| 85 |
):
|
| 86 |
-
content
|
| 87 |
file_size = len(content)
|
|
|
|
|
|
|
| 88 |
|
| 89 |
ok, msg = can_upload(user_id, file_size)
|
| 90 |
if not ok:
|
|
@@ -93,6 +109,7 @@ async def upload(
|
|
| 93 |
# ββ Storage capacity gate βββββββββββββββββββββββββββββββββββββββββββββ
|
| 94 |
if is_storage_near_full(file_size):
|
| 95 |
# Queue the upload; it will be processed once expired docs are purged
|
|
|
|
| 96 |
result = enqueue_upload(
|
| 97 |
user_id=user_id,
|
| 98 |
filename=file.filename or "upload",
|
|
@@ -127,7 +144,7 @@ async def upload(
|
|
| 127 |
|
| 128 |
# Process in background (parse β chunk β embed β store)
|
| 129 |
bg.add_task(_process_doc, content, doc_id, user_id, expires, file.filename)
|
| 130 |
-
|
| 131 |
return {"doc_id": doc_id, "status": "processing", "expires_at": expires.isoformat()}
|
| 132 |
|
| 133 |
|
|
@@ -200,14 +217,33 @@ async def process_from_storage(
|
|
| 200 |
|
| 201 |
async def _process_doc(content, doc_id, user_id, expires, filename):
|
| 202 |
supa = _supa()
|
|
|
|
|
|
|
|
|
|
| 203 |
try:
|
|
|
|
| 204 |
pages = parse_file_pages(content, filename)
|
|
|
|
|
|
|
|
|
|
| 205 |
chunks = smart_chunk_pages(pages, filename=filename)
|
|
|
|
|
|
|
|
|
|
| 206 |
embeds = embed_chunks([c.text for c in chunks])
|
|
|
|
|
|
|
|
|
|
| 207 |
store_chunks(doc_id, user_id, chunks, embeds, expires)
|
|
|
|
| 208 |
supa.table("documents").update({"status": "ready", "chunk_count": len(chunks)}) \
|
| 209 |
.eq("id", doc_id).execute()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
except Exception as e:
|
|
|
|
| 211 |
supa.table("documents").update({"status": "error", "error": str(e)}) \
|
| 212 |
.eq("id", doc_id).execute()
|
| 213 |
|
|
@@ -465,6 +501,7 @@ async def llm_switch(req: LLMSwitchRequest, bg: BackgroundTasks):
|
|
| 465 |
if get_model_name() == req.model and is_llm_ready():
|
| 466 |
return {"ok": True, "switching": False, "model": req.model, "msg": "Already active"}
|
| 467 |
|
|
|
|
| 468 |
bg.add_task(_do_switch_model, req.model)
|
| 469 |
return {"ok": True, "switching": True, "model": req.model}
|
| 470 |
|
|
|
|
| 7 |
import uuid
|
| 8 |
import os
|
| 9 |
import json
|
| 10 |
+
import time
|
| 11 |
+
from datetime import datetime, timezone
|
| 12 |
from typing import Optional
|
| 13 |
|
| 14 |
from model.loader import get_llm, get_model_name, is_llm_ready, switch_model, is_loading, get_loading_status
|
| 15 |
+
from model.log import banner, section, step, ok, warn, error
|
| 16 |
from retrieval.embedder import get_model, embed_chunks, embed_query
|
| 17 |
from retrieval.vectorstore import (
|
| 18 |
store_chunks, similarity_search, similarity_search_multi,
|
|
|
|
| 53 |
@asynccontextmanager
|
| 54 |
async def lifespan(app: FastAPI):
|
| 55 |
import asyncio
|
| 56 |
+
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
|
| 57 |
+
banner(f"LUMINARY BACKEND Β· {ts}")
|
| 58 |
+
|
| 59 |
+
section("STARTUP", "Embedding model")
|
| 60 |
+
step("STARTUP", "Loading BAAI/bge-small-en-v1.5β¦")
|
| 61 |
+
try:
|
| 62 |
+
t0 = time.perf_counter()
|
| 63 |
+
get_model()
|
| 64 |
+
ok("STARTUP", f"Embedding model ready ({time.perf_counter() - t0:.1f}s)")
|
| 65 |
+
except Exception as exc:
|
| 66 |
+
error("STARTUP", f"Embedding model failed: {exc}")
|
| 67 |
+
|
| 68 |
+
section("STARTUP", "LLM")
|
| 69 |
+
step("STARTUP", f"Loading {get_model_name()} in background threadβ¦")
|
| 70 |
loop = asyncio.get_event_loop()
|
| 71 |
try:
|
| 72 |
await loop.run_in_executor(None, get_llm)
|
| 73 |
+
ok("STARTUP", f"LLM ready β {get_model_name()}")
|
| 74 |
except Exception as exc:
|
| 75 |
+
error("STARTUP", f"LLM load failed: {exc}")
|
| 76 |
+
|
| 77 |
+
section("STARTUP", "All systems go")
|
| 78 |
yield
|
| 79 |
|
| 80 |
|
|
|
|
| 97 |
user_id: str,
|
| 98 |
bg: BackgroundTasks,
|
| 99 |
):
|
| 100 |
+
content = await file.read()
|
| 101 |
file_size = len(content)
|
| 102 |
+
size_kb = file_size / 1024
|
| 103 |
+
step("UPLOAD", f"{file.filename} Β· {size_kb:.0f} KB Β· user={user_id[:8]}")
|
| 104 |
|
| 105 |
ok, msg = can_upload(user_id, file_size)
|
| 106 |
if not ok:
|
|
|
|
| 109 |
# ββ Storage capacity gate βββββββββββββββββββββββββββββββββββββββββββββ
|
| 110 |
if is_storage_near_full(file_size):
|
| 111 |
# Queue the upload; it will be processed once expired docs are purged
|
| 112 |
+
warn("UPLOAD", "Storage near full β queueing upload")
|
| 113 |
result = enqueue_upload(
|
| 114 |
user_id=user_id,
|
| 115 |
filename=file.filename or "upload",
|
|
|
|
| 144 |
|
| 145 |
# Process in background (parse β chunk β embed β store)
|
| 146 |
bg.add_task(_process_doc, content, doc_id, user_id, expires, file.filename)
|
| 147 |
+
ok("UPLOAD", f"Accepted Β· doc={doc_id[:8]} Β· expires={expires.date()}")
|
| 148 |
return {"doc_id": doc_id, "status": "processing", "expires_at": expires.isoformat()}
|
| 149 |
|
| 150 |
|
|
|
|
| 217 |
|
| 218 |
async def _process_doc(content, doc_id, user_id, expires, filename):
|
| 219 |
supa = _supa()
|
| 220 |
+
t0 = time.perf_counter()
|
| 221 |
+
short_id = doc_id[:8]
|
| 222 |
+
section("PROCESS", f"{filename} [{short_id}]")
|
| 223 |
try:
|
| 224 |
+
step("PROCESS", f"Parsing {filename}")
|
| 225 |
pages = parse_file_pages(content, filename)
|
| 226 |
+
ok("PROCESS", f"Parsed β {len(pages)} page(s)")
|
| 227 |
+
|
| 228 |
+
step("PROCESS", "Chunking pagesβ¦")
|
| 229 |
chunks = smart_chunk_pages(pages, filename=filename)
|
| 230 |
+
ok("PROCESS", f"Chunked β {len(chunks)} chunk(s)")
|
| 231 |
+
|
| 232 |
+
step("PROCESS", f"Embedding {len(chunks)} chunksβ¦")
|
| 233 |
embeds = embed_chunks([c.text for c in chunks])
|
| 234 |
+
ok("PROCESS", f"Embedded ({len(embeds)} vectors)")
|
| 235 |
+
|
| 236 |
+
step("PROCESS", "Storing vectors in Supabaseβ¦")
|
| 237 |
store_chunks(doc_id, user_id, chunks, embeds, expires)
|
| 238 |
+
|
| 239 |
supa.table("documents").update({"status": "ready", "chunk_count": len(chunks)}) \
|
| 240 |
.eq("id", doc_id).execute()
|
| 241 |
+
|
| 242 |
+
elapsed = time.perf_counter() - t0
|
| 243 |
+
ok("PROCESS", f"Document ready Β· {len(chunks)} chunks Β· {elapsed:.2f}s [{short_id}]")
|
| 244 |
+
|
| 245 |
except Exception as e:
|
| 246 |
+
error("PROCESS", f"{filename} [{short_id}] β {e}")
|
| 247 |
supa.table("documents").update({"status": "error", "error": str(e)}) \
|
| 248 |
.eq("id", doc_id).execute()
|
| 249 |
|
|
|
|
| 501 |
if get_model_name() == req.model and is_llm_ready():
|
| 502 |
return {"ok": True, "switching": False, "model": req.model, "msg": "Already active"}
|
| 503 |
|
| 504 |
+
step("SWITCH", f"Admin requested {get_model_name()} β {req.model}")
|
| 505 |
bg.add_task(_do_switch_model, req.model)
|
| 506 |
return {"ok": True, "switching": True, "model": req.model}
|
| 507 |
|
model/loader.py
CHANGED
|
@@ -16,6 +16,7 @@ Model options (set LLM_MODEL env var in HF Space to switch, no redeploy needed):
|
|
| 16 |
|
| 17 |
Note:
|
| 18 |
- EXAONE requires trust_remote_code=True (LG AI custom architecture).
|
|
|
|
| 19 |
- Llama 3.2 and Gemma 3 may require a HF_TOKEN env var (gated models).
|
| 20 |
- Qwen3 supports /think and /no_think prefixes for reasoning depth control.
|
| 21 |
"""
|
|
@@ -25,6 +26,7 @@ import time
|
|
| 25 |
import threading
|
| 26 |
import torch
|
| 27 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
|
|
| 28 |
|
| 29 |
MODEL_ID = os.environ.get("LLM_MODEL", "HuggingFaceTB/SmolLM2-360M-Instruct")
|
| 30 |
|
|
@@ -49,35 +51,63 @@ _switch_lock = threading.Lock()
|
|
| 49 |
|
| 50 |
def _load() -> None:
|
| 51 |
global _tokenizer, _llm, _llm_ready, _loading_msg
|
|
|
|
| 52 |
if _llm is not None:
|
| 53 |
return
|
| 54 |
|
| 55 |
-
t0
|
| 56 |
-
sep = "-" * 60
|
| 57 |
-
print(f"\n{sep}", flush=True)
|
| 58 |
-
print(f" Loading {MODEL_ID}", flush=True)
|
| 59 |
-
print(f" First boot downloads model weights then caches to disk.", flush=True)
|
| 60 |
-
print(f"{sep}\n", flush=True)
|
| 61 |
-
|
| 62 |
_trc = _needs_trust_remote_code(MODEL_ID)
|
| 63 |
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
-
_loading_msg = f"Loading model weights for {MODEL_ID}β¦ (may download on first run)"
|
| 69 |
-
_llm = AutoModelForCausalLM.from_pretrained(
|
| 70 |
-
MODEL_ID,
|
| 71 |
-
torch_dtype=torch.bfloat16,
|
| 72 |
-
trust_remote_code=_trc,
|
| 73 |
-
)
|
| 74 |
_llm.eval()
|
| 75 |
_llm_ready = True
|
| 76 |
_loading_msg = ""
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
|
|
|
| 81 |
|
| 82 |
|
| 83 |
def get_tokenizer() -> AutoTokenizer:
|
|
@@ -116,32 +146,41 @@ def switch_model(new_model_id: str) -> None:
|
|
| 116 |
global _loading, _loading_msg, _loading_error
|
| 117 |
|
| 118 |
with _switch_lock:
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
| 120 |
_loading_error = None
|
| 121 |
-
_loading_msg
|
| 122 |
-
_llm_ready
|
| 123 |
|
| 124 |
-
# Release model from memory
|
|
|
|
| 125 |
try:
|
| 126 |
import gc
|
| 127 |
-
_llm
|
| 128 |
_tokenizer = None
|
| 129 |
gc.collect()
|
| 130 |
if torch.cuda.is_available():
|
| 131 |
torch.cuda.empty_cache()
|
| 132 |
-
|
| 133 |
-
|
|
|
|
|
|
|
| 134 |
|
| 135 |
MODEL_ID = new_model_id
|
|
|
|
| 136 |
|
| 137 |
try:
|
| 138 |
-
_load() # uses updated
|
| 139 |
_loading = False
|
|
|
|
| 140 |
except Exception as exc:
|
| 141 |
_loading_error = str(exc)
|
| 142 |
-
_loading
|
| 143 |
-
_loading_msg
|
|
|
|
| 144 |
|
| 145 |
|
| 146 |
def is_llm_ready() -> bool:
|
| 147 |
-
return _llm_ready
|
|
|
|
| 16 |
|
| 17 |
Note:
|
| 18 |
- EXAONE requires trust_remote_code=True (LG AI custom architecture).
|
| 19 |
+
Requires transformers>=4.46.0 for RopeParameters support.
|
| 20 |
- Llama 3.2 and Gemma 3 may require a HF_TOKEN env var (gated models).
|
| 21 |
- Qwen3 supports /think and /no_think prefixes for reasoning depth control.
|
| 22 |
"""
|
|
|
|
| 26 |
import threading
|
| 27 |
import torch
|
| 28 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 29 |
+
from model.log import section, step, ok, warn, error
|
| 30 |
|
| 31 |
MODEL_ID = os.environ.get("LLM_MODEL", "HuggingFaceTB/SmolLM2-360M-Instruct")
|
| 32 |
|
|
|
|
| 51 |
|
| 52 |
def _load() -> None:
|
| 53 |
global _tokenizer, _llm, _llm_ready, _loading_msg
|
| 54 |
+
|
| 55 |
if _llm is not None:
|
| 56 |
return
|
| 57 |
|
| 58 |
+
t0 = time.perf_counter()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
_trc = _needs_trust_remote_code(MODEL_ID)
|
| 60 |
|
| 61 |
+
section("MODEL", f"Loading {MODEL_ID}")
|
| 62 |
+
|
| 63 |
+
if _trc:
|
| 64 |
+
step("MODEL", "trust_remote_code=True (custom architecture)")
|
| 65 |
+
|
| 66 |
+
# ββ Tokenizer βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 67 |
+
_loading_msg = f"Loading tokenizerβ¦"
|
| 68 |
+
step("MODEL", f"Fetching tokenizerβ¦")
|
| 69 |
+
try:
|
| 70 |
+
_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=_trc)
|
| 71 |
+
ok("MODEL", "Tokenizer loaded")
|
| 72 |
+
except Exception as exc:
|
| 73 |
+
error("MODEL", f"Tokenizer load failed β {exc}")
|
| 74 |
+
raise
|
| 75 |
+
|
| 76 |
+
# ββ Weights βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 77 |
+
_loading_msg = "Loading model weights⦠(downloads on first run, then cached)"
|
| 78 |
+
step("MODEL", "Loading weights (first run will download β subsequent boots use cache)")
|
| 79 |
+
|
| 80 |
+
device_info = "CUDA" if torch.cuda.is_available() else "CPU"
|
| 81 |
+
step("MODEL", f"Device: {device_info} Β· dtype: bfloat16")
|
| 82 |
+
|
| 83 |
+
try:
|
| 84 |
+
_llm = AutoModelForCausalLM.from_pretrained(
|
| 85 |
+
MODEL_ID,
|
| 86 |
+
torch_dtype=torch.bfloat16,
|
| 87 |
+
trust_remote_code=_trc,
|
| 88 |
+
)
|
| 89 |
+
except ImportError as exc:
|
| 90 |
+
_hint = ""
|
| 91 |
+
if "RopeParameters" in str(exc):
|
| 92 |
+
_hint = (
|
| 93 |
+
"\n Hint: EXAONE-3.5 requires transformers>=4.46.0.\n"
|
| 94 |
+
" Ensure requirements.txt contains transformers>=4.46.0\n"
|
| 95 |
+
" and rebuild/restart the Space."
|
| 96 |
+
)
|
| 97 |
+
error("MODEL", f"{exc}{_hint}")
|
| 98 |
+
raise
|
| 99 |
+
except Exception as exc:
|
| 100 |
+
error("MODEL", str(exc))
|
| 101 |
+
raise
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
_llm.eval()
|
| 104 |
_llm_ready = True
|
| 105 |
_loading_msg = ""
|
| 106 |
+
|
| 107 |
+
elapsed = time.perf_counter() - t0
|
| 108 |
+
params = sum(p.numel() for p in _llm.parameters()) / 1e6
|
| 109 |
+
ok("MODEL", f"Ready Β· {params:.0f}M params Β· {elapsed:.1f}s")
|
| 110 |
+
section("MODEL", "Model online")
|
| 111 |
|
| 112 |
|
| 113 |
def get_tokenizer() -> AutoTokenizer:
|
|
|
|
| 146 |
global _loading, _loading_msg, _loading_error
|
| 147 |
|
| 148 |
with _switch_lock:
|
| 149 |
+
prev = MODEL_ID
|
| 150 |
+
section("SWITCH", f"{prev} β {new_model_id}")
|
| 151 |
+
|
| 152 |
+
_loading = True
|
| 153 |
_loading_error = None
|
| 154 |
+
_loading_msg = f"Unloading {prev}β¦"
|
| 155 |
+
_llm_ready = False
|
| 156 |
|
| 157 |
+
# ββ Release current model from memory βββββββββββββββββββββββββββββββββ
|
| 158 |
+
step("SWITCH", f"Unloading {prev}")
|
| 159 |
try:
|
| 160 |
import gc
|
| 161 |
+
_llm = None
|
| 162 |
_tokenizer = None
|
| 163 |
gc.collect()
|
| 164 |
if torch.cuda.is_available():
|
| 165 |
torch.cuda.empty_cache()
|
| 166 |
+
step("SWITCH", "CUDA cache cleared")
|
| 167 |
+
ok("SWITCH", "Memory freed")
|
| 168 |
+
except Exception as exc:
|
| 169 |
+
warn("SWITCH", f"Cleanup warning: {exc}")
|
| 170 |
|
| 171 |
MODEL_ID = new_model_id
|
| 172 |
+
step("SWITCH", f"Starting load of {new_model_id}")
|
| 173 |
|
| 174 |
try:
|
| 175 |
+
_load() # uses updated MODEL_ID; sets _llm_ready = True
|
| 176 |
_loading = False
|
| 177 |
+
ok("SWITCH", f"Switch complete β {new_model_id}")
|
| 178 |
except Exception as exc:
|
| 179 |
_loading_error = str(exc)
|
| 180 |
+
_loading = False
|
| 181 |
+
_loading_msg = ""
|
| 182 |
+
error("SWITCH", f"Failed to load {new_model_id}\n {exc}")
|
| 183 |
|
| 184 |
|
| 185 |
def is_llm_ready() -> bool:
|
| 186 |
+
return _llm_ready
|
model/log.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Shared structured logger for the Luminary HF backend.
|
| 3 |
+
Outputs readable, sectioned logs that are easy to scan in the HF Space container view.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
import logging
|
| 8 |
+
import sys
|
| 9 |
+
import time
|
| 10 |
+
from datetime import datetime, timezone
|
| 11 |
+
|
| 12 |
+
# ββ ANSI colour palette ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 13 |
+
_R = "\033[0m" # reset
|
| 14 |
+
_B = "\033[1m" # bold
|
| 15 |
+
_DIM = "\033[2m" # dim
|
| 16 |
+
_GRN = "\033[32m" # green
|
| 17 |
+
_CYN = "\033[36m" # cyan
|
| 18 |
+
_YLW = "\033[33m" # yellow
|
| 19 |
+
_RED = "\033[31m" # red
|
| 20 |
+
_MAG = "\033[35m" # magenta
|
| 21 |
+
_BLU = "\033[34m" # blue
|
| 22 |
+
_WHT = "\033[97m" # bright white
|
| 23 |
+
|
| 24 |
+
_TAG_COLORS: dict[str, str] = {
|
| 25 |
+
"STARTUP": _CYN,
|
| 26 |
+
"MODEL": _MAG,
|
| 27 |
+
"UPLOAD": _BLU,
|
| 28 |
+
"PROCESS": _BLU,
|
| 29 |
+
"CHAT": _GRN,
|
| 30 |
+
"QUIZ": _GRN,
|
| 31 |
+
"SWITCH": _YLW,
|
| 32 |
+
"ERROR": _RED,
|
| 33 |
+
"HEALTH": _DIM,
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class _FmtHandler(logging.StreamHandler):
|
| 38 |
+
"""Formatter that wraps log records into readable tag-prefixed lines."""
|
| 39 |
+
|
| 40 |
+
def emit(self, record: logging.LogRecord) -> None:
|
| 41 |
+
try:
|
| 42 |
+
tag = getattr(record, "tag", record.levelname)
|
| 43 |
+
msg = record.getMessage()
|
| 44 |
+
color = _TAG_COLORS.get(tag, _WHT)
|
| 45 |
+
ts = datetime.now(timezone.utc).strftime("%H:%M:%S")
|
| 46 |
+
prefix = f"{_DIM}{ts}{_R} {color}{_B}[{tag}]{_R}"
|
| 47 |
+
# indent continuation lines
|
| 48 |
+
lines = msg.splitlines()
|
| 49 |
+
out = prefix + " " + lines[0]
|
| 50 |
+
for line in lines[1:]:
|
| 51 |
+
out += "\n" + (" " * (len(ts) + len(tag) + 5)) + line
|
| 52 |
+
sys.stdout.write(out + "\n")
|
| 53 |
+
sys.stdout.flush()
|
| 54 |
+
except Exception:
|
| 55 |
+
self.handleError(record)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# ββ Module-level logger setup βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 59 |
+
_handler = _FmtHandler()
|
| 60 |
+
_handler.setFormatter(logging.Formatter("%(message)s"))
|
| 61 |
+
|
| 62 |
+
log = logging.getLogger("luminary")
|
| 63 |
+
log.setLevel(logging.DEBUG)
|
| 64 |
+
if not log.handlers:
|
| 65 |
+
log.addHandler(_handler)
|
| 66 |
+
log.propagate = False
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
# ββ Convenience helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 70 |
+
|
| 71 |
+
def _tag(tag: str) -> dict:
|
| 72 |
+
return {"extra": {"tag": tag}}
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def banner(title: str, width: int = 58) -> None:
|
| 76 |
+
"""Print a prominent box banner (e.g. at startup)."""
|
| 77 |
+
bar = "β" * width
|
| 78 |
+
inner = title.center(width)
|
| 79 |
+
sys.stdout.write(
|
| 80 |
+
f"\n{_B}{_CYN}β{bar}β\n"
|
| 81 |
+
f"β{_WHT}{_B}{inner}{_CYN}β\n"
|
| 82 |
+
f"β{bar}β{_R}\n\n"
|
| 83 |
+
)
|
| 84 |
+
sys.stdout.flush()
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def section(tag: str, msg: str) -> None:
|
| 88 |
+
"""Print a thin divider line with an annotation."""
|
| 89 |
+
color = _TAG_COLORS.get(tag, _WHT)
|
| 90 |
+
ts = datetime.now(timezone.utc).strftime("%H:%M:%S")
|
| 91 |
+
width = max(0, 58 - len(tag) - len(msg) - 4)
|
| 92 |
+
bar = "β" * width
|
| 93 |
+
sys.stdout.write(f"{_DIM}{ts}{_R} {color}{_B}[{tag}]{_R} {_DIM}{msg} {bar}{_R}\n")
|
| 94 |
+
sys.stdout.flush()
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def ok(tag: str, msg: str) -> None:
|
| 98 |
+
color = _TAG_COLORS.get(tag, _WHT)
|
| 99 |
+
ts = datetime.now(timezone.utc).strftime("%H:%M:%S")
|
| 100 |
+
sys.stdout.write(f"{_DIM}{ts}{_R} {color}{_B}[{tag}]{_R} {_GRN}β{_R} {msg}\n")
|
| 101 |
+
sys.stdout.flush()
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def step(tag: str, msg: str) -> None:
|
| 105 |
+
color = _TAG_COLORS.get(tag, _WHT)
|
| 106 |
+
ts = datetime.now(timezone.utc).strftime("%H:%M:%S")
|
| 107 |
+
sys.stdout.write(f"{_DIM}{ts}{_R} {color}{_B}[{tag}]{_R} {_DIM}β{_R} {msg}\n")
|
| 108 |
+
sys.stdout.flush()
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def warn(tag: str, msg: str) -> None:
|
| 112 |
+
color = _TAG_COLORS.get(tag, _WHT)
|
| 113 |
+
ts = datetime.now(timezone.utc).strftime("%H:%M:%S")
|
| 114 |
+
sys.stdout.write(f"{_DIM}{ts}{_R} {color}{_B}[{tag}]{_R} {_YLW}β {_R} {_YLW}{msg}{_R}\n")
|
| 115 |
+
sys.stdout.flush()
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def error(tag: str, msg: str) -> None:
|
| 119 |
+
ts = datetime.now(timezone.utc).strftime("%H:%M:%S")
|
| 120 |
+
sys.stdout.write(
|
| 121 |
+
f"{_DIM}{ts}{_R} {_RED}{_B}[{tag}]{_R} "
|
| 122 |
+
f"{_RED}β Error{_R}\n"
|
| 123 |
+
f"{' ' * (len(ts) + len(tag) + 5)}{_RED}{msg}{_R}\n"
|
| 124 |
+
)
|
| 125 |
+
sys.stdout.flush()
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
class Timer:
|
| 129 |
+
"""Context manager / manual stopwatch with labelled output."""
|
| 130 |
+
def __init__(self, tag: str, label: str) -> None:
|
| 131 |
+
self.tag = tag
|
| 132 |
+
self.label = label
|
| 133 |
+
self._t0: float = 0.0
|
| 134 |
+
|
| 135 |
+
def start(self) -> "Timer":
|
| 136 |
+
self._t0 = time.perf_counter()
|
| 137 |
+
return self
|
| 138 |
+
|
| 139 |
+
def elapsed(self) -> float:
|
| 140 |
+
return time.perf_counter() - self._t0
|
| 141 |
+
|
| 142 |
+
def done(self, extra: str = "") -> float:
|
| 143 |
+
secs = self.elapsed()
|
| 144 |
+
msg = f"{self.label} {_DIM}({secs:.2f}s){_R}"
|
| 145 |
+
if extra:
|
| 146 |
+
msg += f" {_DIM}{extra}{_R}"
|
| 147 |
+
ok(self.tag, msg)
|
| 148 |
+
return secs
|
| 149 |
+
|
| 150 |
+
def __enter__(self) -> "Timer":
|
| 151 |
+
return self.start()
|
| 152 |
+
|
| 153 |
+
def __exit__(self, *_) -> None:
|
| 154 |
+
self.done()
|
requirements.txt
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
fastapi
|
| 2 |
uvicorn[standard]==0.34.0
|
| 3 |
sentence-transformers==4.1.0
|
|
|
|
| 4 |
huggingface-hub>=0.31.0
|
| 5 |
supabase==2.13.0
|
| 6 |
pymupdf==1.25.3
|
|
|
|
| 1 |
fastapi
|
| 2 |
uvicorn[standard]==0.34.0
|
| 3 |
sentence-transformers==4.1.0
|
| 4 |
+
transformers>=4.46.0
|
| 5 |
huggingface-hub>=0.31.0
|
| 6 |
supabase==2.13.0
|
| 7 |
pymupdf==1.25.3
|