Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -47,6 +47,7 @@ MAX_TOKENS = _int_env("MAX_TOKENS", 60) # 生成 token 上
|
|
| 47 |
FEW_SHOT_EXAMPLES_COUNT = _int_env("FEW_SHOT", 0 if DEVICE == "cpu" else 1)
|
| 48 |
ENABLE_INDEX = str(os.getenv("ENABLE_INDEX", "0" if DEVICE == "cpu" else "1")).lower() in {"1", "true", "yes", "y"}
|
| 49 |
EMBED_BATCH = _int_env("EMBED_BATCH", 8 if DEVICE == "cpu" else 16)
|
|
|
|
| 50 |
|
| 51 |
# 使用 /tmp 作為暫存目錄
|
| 52 |
TEMP_DIR = "/tmp/text_to_sql_cache"
|
|
@@ -58,6 +59,7 @@ print(f"數據集: {DATASET_REPO_ID}")
|
|
| 58 |
print(f"嵌入模型: {EMBED_MODEL_NAME}")
|
| 59 |
print(f"設備: {DEVICE} (USE_GPU={USE_GPU}, N_GPU_LAYERS={N_GPU_LAYERS})")
|
| 60 |
print(f"THREADS={THREADS}, CTX={CTX}, MAX_TOKENS={MAX_TOKENS}, FEW_SHOT={FEW_SHOT_EXAMPLES_COUNT}, ENABLE_INDEX={ENABLE_INDEX}, EMBED_BATCH={EMBED_BATCH}")
|
|
|
|
| 61 |
print(f"暫存目錄: {TEMP_DIR}")
|
| 62 |
print("=" * 60)
|
| 63 |
|
|
@@ -203,7 +205,7 @@ class TextToSQLSystem:
|
|
| 203 |
model_path=model_path,
|
| 204 |
n_ctx=CTX, # 上下文長度(CPU 默認更小)
|
| 205 |
n_threads=THREADS, # 使用多執行緒
|
| 206 |
-
n_batch=
|
| 207 |
verbose=False,
|
| 208 |
n_gpu_layers=ngl, # 可選 GPU 加速
|
| 209 |
use_mmap=True, # 使用內存映射減少內存占用
|
|
@@ -254,6 +256,7 @@ class TextToSQLSystem:
|
|
| 254 |
# 清理垃圾收集
|
| 255 |
gc.collect()
|
| 256 |
|
|
|
|
| 257 |
output = self.llm(
|
| 258 |
prompt,
|
| 259 |
max_tokens=MAX_TOKENS, # 生成長度可配置
|
|
@@ -262,6 +265,8 @@ class TextToSQLSystem:
|
|
| 262 |
echo=False,
|
| 263 |
stop=["```", ";", "\n\n", "</s>"],
|
| 264 |
)
|
|
|
|
|
|
|
| 265 |
|
| 266 |
self._log(f"模型原始輸出: {str(output)[:200]}...", "DEBUG")
|
| 267 |
|
|
|
|
| 47 |
FEW_SHOT_EXAMPLES_COUNT = _int_env("FEW_SHOT", 0 if DEVICE == "cpu" else 1)
|
| 48 |
ENABLE_INDEX = str(os.getenv("ENABLE_INDEX", "0" if DEVICE == "cpu" else "1")).lower() in {"1", "true", "yes", "y"}
|
| 49 |
EMBED_BATCH = _int_env("EMBED_BATCH", 8 if DEVICE == "cpu" else 16)
|
| 50 |
+
N_BATCH = _int_env("N_BATCH", 128 if DEVICE == "cpu" else 256)
|
| 51 |
|
| 52 |
# 使用 /tmp 作為暫存目錄
|
| 53 |
TEMP_DIR = "/tmp/text_to_sql_cache"
|
|
|
|
| 59 |
print(f"嵌入模型: {EMBED_MODEL_NAME}")
|
| 60 |
print(f"設備: {DEVICE} (USE_GPU={USE_GPU}, N_GPU_LAYERS={N_GPU_LAYERS})")
|
| 61 |
print(f"THREADS={THREADS}, CTX={CTX}, MAX_TOKENS={MAX_TOKENS}, FEW_SHOT={FEW_SHOT_EXAMPLES_COUNT}, ENABLE_INDEX={ENABLE_INDEX}, EMBED_BATCH={EMBED_BATCH}")
|
| 62 |
+
print(f"N_BATCH={N_BATCH}")
|
| 63 |
print(f"暫存目錄: {TEMP_DIR}")
|
| 64 |
print("=" * 60)
|
| 65 |
|
|
|
|
| 205 |
model_path=model_path,
|
| 206 |
n_ctx=CTX, # 上下文長度(CPU 默認更小)
|
| 207 |
n_threads=THREADS, # 使用多執行緒
|
| 208 |
+
n_batch=N_BATCH, # 批處理大小(可配置)
|
| 209 |
verbose=False,
|
| 210 |
n_gpu_layers=ngl, # 可選 GPU 加速
|
| 211 |
use_mmap=True, # 使用內存映射減少內存占用
|
|
|
|
| 256 |
# 清理垃圾收集
|
| 257 |
gc.collect()
|
| 258 |
|
| 259 |
+
start_ts = datetime.now()
|
| 260 |
output = self.llm(
|
| 261 |
prompt,
|
| 262 |
max_tokens=MAX_TOKENS, # 生成長度可配置
|
|
|
|
| 265 |
echo=False,
|
| 266 |
stop=["```", ";", "\n\n", "</s>"],
|
| 267 |
)
|
| 268 |
+
elapsed = (datetime.now() - start_ts).total_seconds()
|
| 269 |
+
self._log(f"推論耗時: {elapsed:.2f}s", "DEBUG")
|
| 270 |
|
| 271 |
self._log(f"模型原始輸出: {str(output)[:200]}...", "DEBUG")
|
| 272 |
|