cwadayi commited on
Commit
58fd6fa
·
verified ·
1 Parent(s): a1ff4ea

Update ai_service.py

Browse files
Files changed (1) hide show
  1. ai_service.py +29 -33
ai_service.py CHANGED
@@ -1,31 +1,25 @@
1
  # ai_service.py
2
  import torch
3
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
4
- from config import (
5
- LLM_MODEL, LLM_MAX_NEW_TOKENS,
6
- LLM_TOP_K, LLM_TEMPERATURE
7
- )
8
 
9
- # 用於延遲載入語言模型的字典
10
- _LLM = {"loaded": False, "ok": False, "err": None, "model": None}
11
 
12
  def _ensure_llm():
13
- """在首次使用時載入 AI 模型。"""
14
  if _LLM["loaded"]:
15
  return _LLM["ok"], _LLM["err"]
16
  _LLM["loaded"] = True
17
 
18
  try:
19
  device = "cuda" if torch.cuda.is_available() else "cpu"
20
- tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
21
- model = AutoModelForCausalLM.from_pretrained(LLM_MODEL).to(device)
22
- pipe = pipeline(
23
- "text-generation",
24
- model=model,
25
- tokenizer=tokenizer,
26
- device=device
27
- )
28
- _LLM.update({"ok": True, "model": pipe})
29
  return True, None
30
  except Exception as e:
31
  _LLM["err"] = f"{e}"
@@ -33,29 +27,31 @@ def _ensure_llm():
33
  return False, _LLM["err"]
34
 
35
  def generate_ai_text(user_prompt: str) -> str:
36
- """使用已載入的 AI 模型生成文字回應。"""
37
  ok, err = _ensure_llm()
38
  if not ok:
39
  return f"🤖 AI 模型無法使用。\n詳細錯誤:{err}"
40
 
41
- pipe = _LLM["model"]
42
-
43
- # [修改] 給予 bloomz 模型一個更清晰的指令
44
- prompt = f"你是一個多功能的台灣在地LINE助理,請用繁體中文簡潔有力地回答以下問題。\n問題:{user_prompt}\n回答:"
 
 
45
 
46
  try:
47
- outputs = pipe(
48
- prompt,
49
- max_new_tokens=LLM_MAX_NEW_TOKENS,
50
- do_sample=True,
51
- temperature=LLM_TEMPERATURE,
52
- top_k=LLM_TOP_K,
53
- )
54
- response = outputs[0]["generated_text"]
 
 
55
 
56
- if prompt in response:
57
- response = response.split(prompt, 1)[-1]
58
-
59
  return response.strip() or "(AI 沒有產生任何內容)"
60
  except Exception as e:
61
  return f"AI 產生內容時發生錯誤:{e}"
 
1
  # ai_service.py
2
  import torch
3
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
4
+ from config import LLM_MODEL, LLM_MAX_NEW_TOKENS, LLM_TEMPERATURE, LLM_TOP_K
 
 
 
5
 
6
+ _LLM = {"loaded": False, "ok": False, "err": None, "model": None, "tokenizer": None, "device": "cpu"}
 
7
 
8
  def _ensure_llm():
9
+ """在首次使用時載入 Flan-T5 模型與分詞器。"""
10
  if _LLM["loaded"]:
11
  return _LLM["ok"], _LLM["err"]
12
  _LLM["loaded"] = True
13
 
14
  try:
15
  device = "cuda" if torch.cuda.is_available() else "cpu"
16
+
17
+ # 載入 T5 專用的分詞器和模型
18
+ tokenizer = T5Tokenizer.from_pretrained(LLM_MODEL)
19
+ model = T5ForConditionalGeneration.from_pretrained(LLM_MODEL).to(device)
20
+
21
+ _LLM.update({"ok": True, "model": model, "tokenizer": tokenizer, "device": device})
22
+ print(f"Flan-T5 model '{LLM_MODEL}' loaded successfully on {device}.")
 
 
23
  return True, None
24
  except Exception as e:
25
  _LLM["err"] = f"{e}"
 
27
  return False, _LLM["err"]
28
 
29
  def generate_ai_text(user_prompt: str) -> str:
30
+ """使用已載入的 Flan-T5 模型生成文字回應。"""
31
  ok, err = _ensure_llm()
32
  if not ok:
33
  return f"🤖 AI 模型無法使用。\n詳細錯誤:{err}"
34
 
35
+ tokenizer = _LLM["tokenizer"]
36
+ model = _LLM["model"]
37
+ device = _LLM["device"]
38
+
39
+ # 為 Flan-T5 建立一個通用的問答指令
40
+ input_text = f"請用繁體中文回答以下問題: {user_prompt}"
41
 
42
  try:
43
+ input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)
44
+
45
+ with torch.no_grad():
46
+ outputs = model.generate(
47
+ input_ids,
48
+ max_new_tokens=LLM_MAX_NEW_TOKENS,
49
+ do_sample=True,
50
+ temperature=LLM_TEMPERATURE,
51
+ top_k=LLM_TOP_K
52
+ )
53
 
54
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
55
  return response.strip() or "(AI 沒有產生任何內容)"
56
  except Exception as e:
57
  return f"AI 產生內容時發生錯誤:{e}"