Datangtang commited on
Commit
0beeef4
·
verified ·
1 Parent(s): 3570e52

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -33
app.py CHANGED
@@ -3,6 +3,7 @@ from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
  import os
5
 
 
6
  # ------------------------------
7
  # Model configuration
8
  # ------------------------------
@@ -17,20 +18,17 @@ MODEL_CONFIGS = {
17
  }
18
  }
19
 
20
- # Model cache
21
- loaded_models = {}
22
 
23
 
24
- # ------------------------------
25
- # Load model safely
26
- # ------------------------------
27
  def load_model(model_name):
28
  if model_name in loaded_models:
 
29
  return loaded_models[model_name]
30
 
31
  cfg = MODEL_CONFIGS[model_name]
32
 
33
- print(f"Downloading {model_name} ...")
34
  model_path = hf_hub_download(
35
  repo_id=cfg["repo_id"],
36
  filename=cfg["filename"],
@@ -38,7 +36,7 @@ def load_model(model_name):
38
  token=os.environ["HF_TOKEN"]
39
  )
40
 
41
- print(f"Loading {model_name} ...")
42
  llm = Llama(
43
  model_path=model_path,
44
  n_ctx=1024,
@@ -47,33 +45,33 @@ def load_model(model_name):
47
  n_gpu_layers=0,
48
  use_mmap=True,
49
  use_mlock=True,
50
- verbose=False
51
  )
52
 
53
  loaded_models[model_name] = llm
 
54
  return llm
55
 
56
 
57
  # ------------------------------
58
- # Chat function
59
  # ------------------------------
60
- def chat_func(message, history, model_name):
61
-
62
  llm = load_model(model_name)
63
 
64
- # ------------------------------
65
- # Build prompt
66
- # ------------------------------
67
  prompt = "System: You are a helpful assistant.\n"
68
 
69
- for user, bot in history[-3:]:
70
- prompt += f"User: {user}\nAssistant: {bot}\n"
 
 
 
 
 
71
 
72
- prompt += f"User: {message}\nAssistant:"
73
 
74
- # ------------------------------
75
- # Model inference
76
- # ------------------------------
77
  output = llm(
78
  prompt,
79
  max_tokens=128,
@@ -84,15 +82,14 @@ def chat_func(message, history, model_name):
84
  stop=["User:", "Assistant:"],
85
  )
86
 
87
- answer = output["choices"][0]["text"]
88
- return answer
89
 
90
 
91
  # ------------------------------
92
  # Gradio UI
93
  # ------------------------------
94
  with gr.Blocks() as demo:
95
-
96
  gr.Markdown("## 🦙 Datangtang Multi-Model GGUF Chat")
97
 
98
  model_selector = gr.Dropdown(
@@ -101,22 +98,26 @@ with gr.Blocks() as demo:
101
  value="1B Model"
102
  )
103
 
104
- chatbot = gr.Chatbot()
105
  msg_box = gr.Textbox(label="Message")
106
 
107
- def user_send(message, history):
108
- history = history + [[message, None]]
109
  return history, ""
110
 
111
- def bot_reply(history, model_name):
112
- user_msg = history[-1][0]
113
- bot_msg = chat_func(user_msg, history[:-1], model_name)
114
- history[-1][1] = bot_msg
115
  return history
116
 
117
- msg_box.submit(user_send, [msg_box, chatbot], [chatbot, msg_box]).then(
118
- bot_reply, [chatbot, model_selector], chatbot
 
 
 
 
 
 
119
  )
120
 
121
-
122
  demo.launch()
 
3
  from huggingface_hub import hf_hub_download
4
  import os
5
 
6
+
7
  # ------------------------------
8
  # Model configuration
9
  # ------------------------------
 
18
  }
19
  }
20
 
21
+ loaded_models = {} # Cache
 
22
 
23
 
 
 
 
24
  def load_model(model_name):
25
  if model_name in loaded_models:
26
+ print(f"Reusing cached model: {model_name}")
27
  return loaded_models[model_name]
28
 
29
  cfg = MODEL_CONFIGS[model_name]
30
 
31
+ print(f"Downloading {model_name}...")
32
  model_path = hf_hub_download(
33
  repo_id=cfg["repo_id"],
34
  filename=cfg["filename"],
 
36
  token=os.environ["HF_TOKEN"]
37
  )
38
 
39
+ print(f"Loading model {model_name}...")
40
  llm = Llama(
41
  model_path=model_path,
42
  n_ctx=1024,
 
45
  n_gpu_layers=0,
46
  use_mmap=True,
47
  use_mlock=True,
48
+ verbose=False,
49
  )
50
 
51
  loaded_models[model_name] = llm
52
+ print(f"Model {model_name} loaded successfully!")
53
  return llm
54
 
55
 
56
  # ------------------------------
57
+ # Chat logic
58
  # ------------------------------
59
+ def generate_reply(history, model_name):
 
60
  llm = load_model(model_name)
61
 
62
+ # Construct prompt with system + chat history
 
 
63
  prompt = "System: You are a helpful assistant.\n"
64
 
65
+ for msg in history:
66
+ role = msg["role"]
67
+ content = msg["content"]
68
+ if role == "user":
69
+ prompt += f"User: {content}\n"
70
+ elif role == "assistant":
71
+ prompt += f"Assistant: {content}\n"
72
 
73
+ prompt += "Assistant:"
74
 
 
 
 
75
  output = llm(
76
  prompt,
77
  max_tokens=128,
 
82
  stop=["User:", "Assistant:"],
83
  )
84
 
85
+ reply = output["choices"][0]["text"]
86
+ return reply.strip()
87
 
88
 
89
  # ------------------------------
90
  # Gradio UI
91
  # ------------------------------
92
  with gr.Blocks() as demo:
 
93
  gr.Markdown("## 🦙 Datangtang Multi-Model GGUF Chat")
94
 
95
  model_selector = gr.Dropdown(
 
98
  value="1B Model"
99
  )
100
 
101
+ chatbot = gr.Chatbot(type="messages")
102
  msg_box = gr.Textbox(label="Message")
103
 
104
+ def user_message(message, history):
105
+ history = history + [{"role": "user", "content": message}]
106
  return history, ""
107
 
108
+ def bot_message(history, model_name):
109
+ reply = generate_reply(history, model_name)
110
+ history = history + [{"role": "assistant", "content": reply}]
 
111
  return history
112
 
113
+ msg_box.submit(
114
+ user_message,
115
+ [msg_box, chatbot],
116
+ [chatbot, msg_box]
117
+ ).then(
118
+ bot_message,
119
+ [chatbot, model_selector],
120
+ chatbot
121
  )
122
 
 
123
  demo.launch()