Datangtang commited on
Commit
a3cfd53
·
verified ·
1 Parent(s): d658b72

修改bug,运行时错误

Browse files
Files changed (1) hide show
  1. app.py +33 -39
app.py CHANGED
@@ -11,7 +11,7 @@ current_model_name = None
11
 
12
  MODEL_CONFIGS = {
13
  "1B Model (Datangtang/GGUF1B)": {
14
- "repo_id": "Datangtang/GGUF1B",
15
  "filename": "llama-3.2-1b-instruct.Q4_K_M.gguf"
16
  },
17
  "3B Model (Datangtang/GGUF3B)": {
@@ -25,13 +25,13 @@ MODEL_CONFIGS = {
25
  # Load model function
26
  # ----------------------------------------
27
  def load_model(model_choice):
28
- global loaded_models, current_model_name
29
-
30
  if model_choice in loaded_models:
 
31
  return loaded_models[model_choice]
32
 
33
  cfg = MODEL_CONFIGS[model_choice]
34
 
 
35
  model_path = hf_hub_download(
36
  repo_id=cfg["repo_id"],
37
  filename=cfg["filename"],
@@ -39,6 +39,7 @@ def load_model(model_choice):
39
  token=os.environ["HF_TOKEN"]
40
  )
41
 
 
42
  llm = Llama(
43
  model_path=model_path,
44
  n_ctx=1024,
@@ -47,36 +48,30 @@ def load_model(model_choice):
47
  n_gpu_layers=0,
48
  use_mmap=True,
49
  use_mlock=True,
50
- verbose=False,
51
  )
52
 
53
  loaded_models[model_choice] = llm
54
- current_model_name = model_choice
55
  return llm
56
 
57
 
58
  # ----------------------------------------
59
- # Chat function (Gradio 4.x message format)
60
  # ----------------------------------------
61
- def chat(messages, model_choice):
62
-
63
  llm = load_model(model_choice)
64
 
65
- # Construct conversation
66
  conversation = "System: You are a helpful assistant.\n"
67
 
68
- for msg in messages[-3:]:
69
- role = msg["role"]
70
- text = msg["content"]
 
71
 
72
- if role == "user":
73
- conversation += f"User: {text}\n"
74
- elif role == "assistant":
75
- conversation += f"Assistant: {text}\n"
76
 
77
- conversation += "Assistant:"
78
-
79
- # LLM output
80
  response = llm(
81
  conversation,
82
  max_tokens=128,
@@ -91,37 +86,36 @@ def chat(messages, model_choice):
91
 
92
 
93
  # ----------------------------------------
94
- # Gradio UI (Gradio 4.x messages format)
95
  # ----------------------------------------
96
  with gr.Blocks() as demo:
97
 
98
- gr.Markdown("# 🦙 Datangtang GGUF Model Demo (Gradio 4.x Compatible)")
99
 
100
  model_choice = gr.Dropdown(
101
  label="Select Model",
102
  choices=list(MODEL_CONFIGS.keys()),
103
- value="1B Model (Datangtang/GGUF1B)",
104
  )
105
 
106
- chatbot = gr.Chatbot(label="Chat", type="messages")
107
  msg_box = gr.Textbox(label="Message")
108
 
109
- # User sends message
110
- def add_user_message(user_msg, messages):
111
- messages = messages + [{"role": "user", "content": user_msg}]
112
- return messages, ""
113
-
114
- # Bot replies
115
- def add_bot_reply(messages, model_choice):
116
- reply = chat(messages, model_choice)
117
- messages = messages + [{"role": "assistant", "content": reply}]
118
- return messages
119
-
120
- msg_box.submit(
121
- add_user_message, [msg_box, chatbot], [chatbot, msg_box]
122
- ).then(
123
- add_bot_reply, [chatbot, model_choice], chatbot
124
  )
125
 
126
-
127
  demo.launch()
 
11
 
12
  MODEL_CONFIGS = {
13
  "1B Model (Datangtang/GGUF1B)": {
14
+ "repo_id": "Datangtang/GFUF1B",
15
  "filename": "llama-3.2-1b-instruct.Q4_K_M.gguf"
16
  },
17
  "3B Model (Datangtang/GGUF3B)": {
 
25
  # Load model function
26
  # ----------------------------------------
27
  def load_model(model_choice):
 
 
28
  if model_choice in loaded_models:
29
+ print(f"Reusing already loaded model: {model_choice}")
30
  return loaded_models[model_choice]
31
 
32
  cfg = MODEL_CONFIGS[model_choice]
33
 
34
+ print(f"Downloading model: {model_choice}")
35
  model_path = hf_hub_download(
36
  repo_id=cfg["repo_id"],
37
  filename=cfg["filename"],
 
39
  token=os.environ["HF_TOKEN"]
40
  )
41
 
42
+ print("Loading model into memory...")
43
  llm = Llama(
44
  model_path=model_path,
45
  n_ctx=1024,
 
48
  n_gpu_layers=0,
49
  use_mmap=True,
50
  use_mlock=True,
51
+ verbose=False
52
  )
53
 
54
  loaded_models[model_choice] = llm
55
+ print("Model loaded successfully!")
56
  return llm
57
 
58
 
59
  # ----------------------------------------
60
+ # Chat function (HuggingFace-compatible)
61
  # ----------------------------------------
62
+ def chat(message, history, model_choice):
 
63
  llm = load_model(model_choice)
64
 
65
+ # Build conversation prompt
66
  conversation = "System: You are a helpful assistant.\n"
67
 
68
+ for human, assistant in history[-3:]:
69
+ conversation += f"User: {human}\n"
70
+ if assistant:
71
+ conversation += f"Assistant: {assistant}\n"
72
 
73
+ conversation += f"User: {message}\nAssistant:"
 
 
 
74
 
 
 
 
75
  response = llm(
76
  conversation,
77
  max_tokens=128,
 
86
 
87
 
88
  # ----------------------------------------
89
+ # Gradio UI
90
  # ----------------------------------------
91
  with gr.Blocks() as demo:
92
 
93
+ gr.Markdown("## 🦙 Datangtang GGUF Model Demo")
94
 
95
  model_choice = gr.Dropdown(
96
  label="Select Model",
97
  choices=list(MODEL_CONFIGS.keys()),
98
+ value="1B Model (Datangtang/GGUF1B)"
99
  )
100
 
101
+ chatbot = gr.Chatbot()
102
  msg_box = gr.Textbox(label="Message")
103
 
104
+ # Add user message to history
105
+ def user_send(message, history):
106
+ history = history + [[message, None]]
107
+ return history, ""
108
+
109
+ # Generate bot response
110
+ def bot_reply(history, model_choice):
111
+ user_msg = history[-1][0]
112
+ bot_msg = chat(user_msg, history[:-1], model_choice)
113
+ history[-1][1] = bot_msg
114
+ return history
115
+
116
+ # Wire events
117
+ msg_box.submit(user_send, [msg_box, chatbot], [chatbot, msg_box]).then(
118
+ bot_reply, [chatbot, model_choice], chatbot
119
  )
120
 
 
121
  demo.launch()