Paul720810 commited on
Commit
6e5e1b9
·
verified ·
1 Parent(s): 1953ba4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -47,6 +47,7 @@ MAX_TOKENS = _int_env("MAX_TOKENS", 60) # 生成 token 上
47
  FEW_SHOT_EXAMPLES_COUNT = _int_env("FEW_SHOT", 0 if DEVICE == "cpu" else 1)
48
  ENABLE_INDEX = str(os.getenv("ENABLE_INDEX", "0" if DEVICE == "cpu" else "1")).lower() in {"1", "true", "yes", "y"}
49
  EMBED_BATCH = _int_env("EMBED_BATCH", 8 if DEVICE == "cpu" else 16)
 
50
 
51
  # 使用 /tmp 作為暫存目錄
52
  TEMP_DIR = "/tmp/text_to_sql_cache"
@@ -58,6 +59,7 @@ print(f"數據集: {DATASET_REPO_ID}")
58
  print(f"嵌入模型: {EMBED_MODEL_NAME}")
59
  print(f"設備: {DEVICE} (USE_GPU={USE_GPU}, N_GPU_LAYERS={N_GPU_LAYERS})")
60
  print(f"THREADS={THREADS}, CTX={CTX}, MAX_TOKENS={MAX_TOKENS}, FEW_SHOT={FEW_SHOT_EXAMPLES_COUNT}, ENABLE_INDEX={ENABLE_INDEX}, EMBED_BATCH={EMBED_BATCH}")
 
61
  print(f"暫存目錄: {TEMP_DIR}")
62
  print("=" * 60)
63
 
@@ -203,7 +205,7 @@ class TextToSQLSystem:
203
  model_path=model_path,
204
  n_ctx=CTX, # 上下文長度(CPU 默認更小)
205
  n_threads=THREADS, # 使用多執行緒
206
- n_batch=256, # 批處理大小
207
  verbose=False,
208
  n_gpu_layers=ngl, # 可選 GPU 加速
209
  use_mmap=True, # 使用內存映射減少內存占用
@@ -254,6 +256,7 @@ class TextToSQLSystem:
254
  # 清理垃圾收集
255
  gc.collect()
256
 
 
257
  output = self.llm(
258
  prompt,
259
  max_tokens=MAX_TOKENS, # 生成長度可配置
@@ -262,6 +265,8 @@ class TextToSQLSystem:
262
  echo=False,
263
  stop=["```", ";", "\n\n", "</s>"],
264
  )
 
 
265
 
266
  self._log(f"模型原始輸出: {str(output)[:200]}...", "DEBUG")
267
 
 
47
  FEW_SHOT_EXAMPLES_COUNT = _int_env("FEW_SHOT", 0 if DEVICE == "cpu" else 1)
48
  ENABLE_INDEX = str(os.getenv("ENABLE_INDEX", "0" if DEVICE == "cpu" else "1")).lower() in {"1", "true", "yes", "y"}
49
  EMBED_BATCH = _int_env("EMBED_BATCH", 8 if DEVICE == "cpu" else 16)
50
+ N_BATCH = _int_env("N_BATCH", 128 if DEVICE == "cpu" else 256)
51
 
52
  # 使用 /tmp 作為暫存目錄
53
  TEMP_DIR = "/tmp/text_to_sql_cache"
 
59
  print(f"嵌入模型: {EMBED_MODEL_NAME}")
60
  print(f"設備: {DEVICE} (USE_GPU={USE_GPU}, N_GPU_LAYERS={N_GPU_LAYERS})")
61
  print(f"THREADS={THREADS}, CTX={CTX}, MAX_TOKENS={MAX_TOKENS}, FEW_SHOT={FEW_SHOT_EXAMPLES_COUNT}, ENABLE_INDEX={ENABLE_INDEX}, EMBED_BATCH={EMBED_BATCH}")
62
+ print(f"N_BATCH={N_BATCH}")
63
  print(f"暫存目錄: {TEMP_DIR}")
64
  print("=" * 60)
65
 
 
205
  model_path=model_path,
206
  n_ctx=CTX, # 上下文長度(CPU 默認更小)
207
  n_threads=THREADS, # 使用多執行緒
208
+ n_batch=N_BATCH, # 批處理大小(可配置)
209
  verbose=False,
210
  n_gpu_layers=ngl, # 可選 GPU 加速
211
  use_mmap=True, # 使用內存映射減少內存占用
 
256
  # 清理垃圾收集
257
  gc.collect()
258
 
259
+ start_ts = datetime.now()
260
  output = self.llm(
261
  prompt,
262
  max_tokens=MAX_TOKENS, # 生成長度可配置
 
265
  echo=False,
266
  stop=["```", ";", "\n\n", "</s>"],
267
  )
268
+ elapsed = (datetime.now() - start_ts).total_seconds()
269
+ self._log(f"推論耗時: {elapsed:.2f}s", "DEBUG")
270
 
271
  self._log(f"模型原始輸出: {str(output)[:200]}...", "DEBUG")
272