cwadayi commited on
Commit
eef0d6d
·
verified ·
1 Parent(s): 285d51d

Update ai_service.py

Browse files
Files changed (1) hide show
  1. ai_service.py +3 -10
ai_service.py CHANGED
@@ -16,22 +16,15 @@ def _ensure_llm():
16
  _LLM["loaded"] = True
17
 
18
  try:
19
- # [修改] 採用更穩健的載入方式
20
- # 1. 決定裝置 (GPU or CPU)
21
  device = "cuda" if torch.cuda.is_available() else "cpu"
22
-
23
- # 2. 分別載入 tokenizer 和 model
24
  tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
25
  model = AutoModelForCausalLM.from_pretrained(LLM_MODEL).to(device)
26
-
27
- # 3. 將載入好的 tokenizer 和 model 傳入 pipeline
28
  pipe = pipeline(
29
  "text-generation",
30
  model=model,
31
  tokenizer=tokenizer,
32
  device=device
33
  )
34
-
35
  _LLM.update({"ok": True, "model": pipe})
36
  return True, None
37
  except Exception as e:
@@ -46,7 +39,9 @@ def generate_ai_text(user_prompt: str) -> str:
46
  return f"🤖 AI 模型無法使用。\n詳細錯誤:{err}"
47
 
48
  pipe = _LLM["model"]
49
- prompt = user_prompt
 
 
50
 
51
  try:
52
  outputs = pipe(
@@ -56,10 +51,8 @@ def generate_ai_text(user_prompt: str) -> str:
56
  temperature=LLM_TEMPERATURE,
57
  top_k=LLM_TOP_K,
58
  )
59
- # 從 pipeline 的輸出中解析出模型生成的部分
60
  response = outputs[0]["generated_text"]
61
 
62
- # 移除原始 prompt 以獲得乾淨的回應
63
  if prompt in response:
64
  response = response.split(prompt, 1)[-1]
65
 
 
16
  _LLM["loaded"] = True
17
 
18
  try:
 
 
19
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
20
  tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
21
  model = AutoModelForCausalLM.from_pretrained(LLM_MODEL).to(device)
 
 
22
  pipe = pipeline(
23
  "text-generation",
24
  model=model,
25
  tokenizer=tokenizer,
26
  device=device
27
  )
 
28
  _LLM.update({"ok": True, "model": pipe})
29
  return True, None
30
  except Exception as e:
 
39
  return f"🤖 AI 模型無法使用。\n詳細錯誤:{err}"
40
 
41
  pipe = _LLM["model"]
42
+
43
+ # [修改] 給予 bloomz 模型一個更清晰的指令
44
+ prompt = f"你是一個多功能的台灣在地LINE助理,請用繁體中文簡潔有力地回答以下問題。\n問題:{user_prompt}\n回答:"
45
 
46
  try:
47
  outputs = pipe(
 
51
  temperature=LLM_TEMPERATURE,
52
  top_k=LLM_TOP_K,
53
  )
 
54
  response = outputs[0]["generated_text"]
55
 
 
56
  if prompt in response:
57
  response = response.split(prompt, 1)[-1]
58