Atypical281795 commited on
Commit
62d94e1
·
verified ·
1 Parent(s): b10f7cd

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -106
app.py DELETED
@@ -1,106 +0,0 @@
1
- import os
2
- import gradio as gr
3
- import torch
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
5
- from peft import PeftModel
6
-
7
- print("=== Application Starting (LoRA Mode) ===")
8
-
9
- try:
10
- # 1. 設定 Base Model (基礎模型)
11
- # 這是您微調時使用的原始模型
12
- BASE_MODEL_ID = "QLU-NLP/BianCang-Qwen2.5-7B"
13
-
14
- # 2. 自動偵測 Adapter (微調權重) 路徑
15
- # 這是您上傳的資料夾
16
- if os.path.exists("BianCang-Qwen2.5-7B-Instruct_finetuned_model_1"):
17
- ADAPTER_PATH = "BianCang-Qwen2.5-7B-Instruct_finetuned_model_1"
18
- else:
19
- ADAPTER_PATH = "."
20
-
21
- print(f"Base Model: {BASE_MODEL_ID}")
22
- print(f"Adapter Path: {ADAPTER_PATH}")
23
-
24
- # 3. 載入 Tokenizer (通常使用 Base Model 的)
25
- print("Loading Tokenizer...")
26
- tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID, trust_remote_code=True)
27
-
28
- # 4. 載入 Base Model
29
- print("Loading Base Model...")
30
- try:
31
- base_model = AutoModelForCausalLM.from_pretrained(
32
- BASE_MODEL_ID,
33
- device_map="auto",
34
- torch_dtype=torch.float16,
35
- trust_remote_code=True
36
- )
37
- except Exception as e:
38
- print(f"GPU load failed: {e}. Fallback to CPU.")
39
- base_model = AutoModelForCausalLM.from_pretrained(
40
- BASE_MODEL_ID,
41
- device_map="cpu",
42
- trust_remote_code=True
43
- )
44
-
45
- # 5. 掛載 LoRA Adapter
46
- print("Loading LoRA Adapter...")
47
- try:
48
- model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
49
- print("LoRA Adapter loaded successfully!")
50
- except Exception as e:
51
- print(f"Failed to load adapter: {e}")
52
- print("Running with Base Model only as fallback.")
53
- model = base_model
54
-
55
- def predict(message, history):
56
- # 構建 Prompt (根據 Qwen 的格式)
57
- # 注意:如果您的微調模型有特殊的 Prompt Template,請在此修改
58
-
59
- system_prompt = "你是一個專業的中醫藥材知識助手。你具備深厚的中醫理論基礎,特別擅長中藥材的性味、歸經、功效與主治。"
60
-
61
- messages = [
62
- {"role": "system", "content": system_prompt}
63
- ]
64
-
65
- for human, assistant in history:
66
- messages.append({"role": "user", "content": human})
67
- messages.append({"role": "assistant", "content": assistant})
68
-
69
- messages.append({"role": "user", "content": message})
70
-
71
- text = tokenizer.apply_chat_template(
72
- messages,
73
- tokenize=False,
74
- add_generation_prompt=True
75
- )
76
-
77
- model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
78
-
79
- generated_ids = model.generate(
80
- model_inputs.input_ids,
81
- max_new_tokens=512,
82
- temperature=0.7,
83
- top_p=0.9,
84
- do_sample=True
85
- )
86
-
87
- generated_ids = [
88
- output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
89
- ]
90
-
91
- response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
92
- return response
93
-
94
- # 建立 Gradio 介面 (這會自動產生 API)
95
- demo = gr.ChatInterface(
96
- fn=predict,
97
- title="BianCang-Qwen2.5-7B TCM Chatbot",
98
- description="中醫藥材知識微調模型"
99
- )
100
-
101
- if __name__ == "__main__":
102
- demo.launch(server_name="0.0.0.0", server_port=7860, show_api=True)
103
-
104
- except Exception as e:
105
- print(f"!!! CRITICAL ERROR ===\n{e}\n======================")
106
- raise e