Atypical281795 commited on
Commit
1427692
·
verified ·
1 Parent(s): ddc9235

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +106 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ from peft import PeftModel
6
+
7
+ print("=== Application Starting (LoRA Mode) ===")
8
+
9
+ try:
10
+ # 1. 設定 Base Model (基礎模型)
11
+ # 這是您微調時使用的原始模型
12
+ BASE_MODEL_ID = "QLU-NLP/BianCang-Qwen2.5-7B"
13
+
14
+ # 2. 自動偵測 Adapter (微調權重) 路徑
15
+ # 這是您上傳的資料夾
16
+ if os.path.exists("BianCang-Qwen2.5-7B-Instruct_finetuned_model_1"):
17
+ ADAPTER_PATH = "BianCang-Qwen2.5-7B-Instruct_finetuned_model_1"
18
+ else:
19
+ ADAPTER_PATH = "."
20
+
21
+ print(f"Base Model: {BASE_MODEL_ID}")
22
+ print(f"Adapter Path: {ADAPTER_PATH}")
23
+
24
+ # 3. 載入 Tokenizer (通常使用 Base Model 的)
25
+ print("Loading Tokenizer...")
26
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID, trust_remote_code=True)
27
+
28
+ # 4. 載入 Base Model
29
+ print("Loading Base Model...")
30
+ try:
31
+ base_model = AutoModelForCausalLM.from_pretrained(
32
+ BASE_MODEL_ID,
33
+ device_map="auto",
34
+ torch_dtype=torch.float16,
35
+ trust_remote_code=True
36
+ )
37
+ except Exception as e:
38
+ print(f"GPU load failed: {e}. Fallback to CPU.")
39
+ base_model = AutoModelForCausalLM.from_pretrained(
40
+ BASE_MODEL_ID,
41
+ device_map="cpu",
42
+ trust_remote_code=True
43
+ )
44
+
45
+ # 5. 掛載 LoRA Adapter
46
+ print("Loading LoRA Adapter...")
47
+ try:
48
+ model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
49
+ print("LoRA Adapter loaded successfully!")
50
+ except Exception as e:
51
+ print(f"Failed to load adapter: {e}")
52
+ print("Running with Base Model only as fallback.")
53
+ model = base_model
54
+
55
+ def predict(message, history):
56
+ # 構建 Prompt (根據 Qwen 的格式)
57
+ # 注意:如果您的微調模型有特殊的 Prompt Template,請在此修改
58
+
59
+ system_prompt = "你是一個專業的中醫藥材知識助手。你具備深厚的中醫理論基礎,特別擅長中藥材的性味、歸經、功效與主治。"
60
+
61
+ messages = [
62
+ {"role": "system", "content": system_prompt}
63
+ ]
64
+
65
+ for human, assistant in history:
66
+ messages.append({"role": "user", "content": human})
67
+ messages.append({"role": "assistant", "content": assistant})
68
+
69
+ messages.append({"role": "user", "content": message})
70
+
71
+ text = tokenizer.apply_chat_template(
72
+ messages,
73
+ tokenize=False,
74
+ add_generation_prompt=True
75
+ )
76
+
77
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
78
+
79
+ generated_ids = model.generate(
80
+ model_inputs.input_ids,
81
+ max_new_tokens=512,
82
+ temperature=0.7,
83
+ top_p=0.9,
84
+ do_sample=True
85
+ )
86
+
87
+ generated_ids = [
88
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
89
+ ]
90
+
91
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
92
+ return response
93
+
94
+ # 建立 Gradio 介面 (這會自動產生 API)
95
+ demo = gr.ChatInterface(
96
+ fn=predict,
97
+ title="BianCang-Qwen2.5-7B TCM Chatbot",
98
+ description="中醫藥材知識微調模型"
99
+ )
100
+
101
+ if __name__ == "__main__":
102
+ demo.launch(server_name="0.0.0.0", server_port=7860, show_api=True)
103
+
104
+ except Exception as e:
105
+ print(f"!!! CRITICAL ERROR ===\n{e}\n======================")
106
+ raise e
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers>=4.46.0
2
+ accelerate>=0.26.0
3
+ gradio>=4.0.0
4
+ peft>=0.7.0
5
+ scipy