Datangtang commited on
Commit
3570e52
·
verified ·
1 Parent(s): 258c757

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -66
app.py CHANGED
@@ -3,39 +3,34 @@ from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
  import os
5
 
6
- # ----------------------------------------
7
- # Global model cache
8
- # ----------------------------------------
9
- loaded_models = {} # Cache loaded Llama models
10
- current_model_name = None
11
-
12
  MODEL_CONFIGS = {
13
- "1B Model (Datangtang/GGUF1B)": {
14
  "repo_id": "Datangtang/GGUF1B",
15
  "filename": "llama-3.2-1b-instruct.Q4_K_M.gguf"
16
  },
17
- "3B Model (Datangtang/GGUF3B)": {
18
- "repo_id": "Datangtang/GGGF3B",
19
  "filename": "llama-3.2-3b-instruct.Q4_K_M.gguf"
20
  }
21
  }
22
 
 
 
23
 
24
- # ----------------------------------------
25
- # Load model function
26
- # ----------------------------------------
27
- def load_model(model_choice):
28
- global loaded_models, current_model_name
29
-
30
- if model_choice in loaded_models:
31
- print(f"Reusing already loaded model: {model_choice}")
32
- current_model_name = model_choice
33
- return loaded_models[model_choice]
34
 
35
- print(f"Downloading model: {model_choice}")
 
 
 
 
 
36
 
37
- cfg = MODEL_CONFIGS[model_choice]
38
 
 
39
  model_path = hf_hub_download(
40
  repo_id=cfg["repo_id"],
41
  filename=cfg["filename"],
@@ -43,9 +38,7 @@ def load_model(model_choice):
43
  token=os.environ["HF_TOKEN"]
44
  )
45
 
46
- print(f"Model downloaded to: {model_path}")
47
- print("Loading GGUF model into memory...")
48
-
49
  llm = Llama(
50
  model_path=model_path,
51
  n_ctx=1024,
@@ -54,75 +47,76 @@ def load_model(model_choice):
54
  n_gpu_layers=0,
55
  use_mmap=True,
56
  use_mlock=True,
57
- verbose=False,
58
  )
59
 
60
- loaded_models[model_choice] = llm
61
- current_model_name = model_choice
62
-
63
- print("Model loaded successfully!")
64
  return llm
65
 
66
 
67
- # ----------------------------------------
68
  # Chat function
69
- # ----------------------------------------
70
- def chat(message, history, model_choice):
71
- llm = load_model(model_choice)
72
 
73
- # System prompt
74
- conversation = "System: You are a helpful assistant.\n"
75
 
76
- # Add last 3 messages
77
- for human, assistant in history[-3:]:
78
- conversation += f"User: {human}\nAssistant: {assistant}\n"
 
79
 
80
- conversation += f"User: {message}\nAssistant:"
 
81
 
82
- response = llm(
83
- conversation,
 
 
 
 
 
84
  max_tokens=128,
85
  temperature=0.7,
86
  top_p=0.9,
87
  top_k=40,
88
  repeat_penalty=1.1,
89
  stop=["User:", "Assistant:"],
90
- echo=False
91
  )
92
 
93
- return response["choices"][0]["text"].strip()
 
94
 
95
 
96
- # ----------------------------------------
97
  # Gradio UI
98
- # ----------------------------------------
99
  with gr.Blocks() as demo:
100
 
101
- gr.Markdown("# 🦙 Datangtang GGUF Model Demo")
102
- gr.Markdown("Switch between **1B** and **3B** GGUF models in real-time.")
103
 
104
- model_choice = gr.Dropdown(
105
- label="Select Model",
106
- choices=list(MODEL_CONFIGS.keys()),
107
- value="1B Model (Datangtang/GGUF1B)",
108
  )
109
 
110
- chat_iface = gr.ChatInterface(
111
- fn=lambda message, history: chat(message, history, model_choice.value),
112
- examples=[
113
- "Explain deep learning in one paragraph.",
114
- "What is the difference between supervised and unsupervised learning?",
115
- "Explain what a transformer model is.",
116
- ],
117
- cache_examples=False,
118
- )
 
 
 
119
 
120
- model_choice.change(
121
- fn=lambda x: f"🔄 Switched to: {x}",
122
- inputs=[model_choice],
123
- outputs=[],
124
  )
125
 
126
 
127
- if __name__ == "__main__":
128
- demo.launch()
 
3
  from huggingface_hub import hf_hub_download
4
  import os
5
 
6
+ # ------------------------------
7
+ # Model configuration
8
+ # ------------------------------
 
 
 
9
  MODEL_CONFIGS = {
10
+ "1B Model": {
11
  "repo_id": "Datangtang/GGUF1B",
12
  "filename": "llama-3.2-1b-instruct.Q4_K_M.gguf"
13
  },
14
+ "3B Model": {
15
+ "repo_id": "Datangtang/GGUF3B",
16
  "filename": "llama-3.2-3b-instruct.Q4_K_M.gguf"
17
  }
18
  }
19
 
20
+ # Model cache
21
+ loaded_models = {}
22
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ # ------------------------------
25
+ # Load model safely
26
+ # ------------------------------
27
+ def load_model(model_name):
28
+ if model_name in loaded_models:
29
+ return loaded_models[model_name]
30
 
31
+ cfg = MODEL_CONFIGS[model_name]
32
 
33
+ print(f"Downloading {model_name} ...")
34
  model_path = hf_hub_download(
35
  repo_id=cfg["repo_id"],
36
  filename=cfg["filename"],
 
38
  token=os.environ["HF_TOKEN"]
39
  )
40
 
41
+ print(f"Loading {model_name} ...")
 
 
42
  llm = Llama(
43
  model_path=model_path,
44
  n_ctx=1024,
 
47
  n_gpu_layers=0,
48
  use_mmap=True,
49
  use_mlock=True,
50
+ verbose=False
51
  )
52
 
53
+ loaded_models[model_name] = llm
 
 
 
54
  return llm
55
 
56
 
57
+ # ------------------------------
58
  # Chat function
59
+ # ------------------------------
60
+ def chat_func(message, history, model_name):
 
61
 
62
+ llm = load_model(model_name)
 
63
 
64
+ # ------------------------------
65
+ # Build prompt
66
+ # ------------------------------
67
+ prompt = "System: You are a helpful assistant.\n"
68
 
69
+ for user, bot in history[-3:]:
70
+ prompt += f"User: {user}\nAssistant: {bot}\n"
71
 
72
+ prompt += f"User: {message}\nAssistant:"
73
+
74
+ # ------------------------------
75
+ # Model inference
76
+ # ------------------------------
77
+ output = llm(
78
+ prompt,
79
  max_tokens=128,
80
  temperature=0.7,
81
  top_p=0.9,
82
  top_k=40,
83
  repeat_penalty=1.1,
84
  stop=["User:", "Assistant:"],
 
85
  )
86
 
87
+ answer = output["choices"][0]["text"]
88
+ return answer
89
 
90
 
91
+ # ------------------------------
92
  # Gradio UI
93
+ # ------------------------------
94
  with gr.Blocks() as demo:
95
 
96
+ gr.Markdown("## 🦙 Datangtang Multi-Model GGUF Chat")
 
97
 
98
+ model_selector = gr.Dropdown(
99
+ label="Choose model",
100
+ choices=["1B Model", "3B Model"],
101
+ value="1B Model"
102
  )
103
 
104
+ chatbot = gr.Chatbot()
105
+ msg_box = gr.Textbox(label="Message")
106
+
107
+ def user_send(message, history):
108
+ history = history + [[message, None]]
109
+ return history, ""
110
+
111
+ def bot_reply(history, model_name):
112
+ user_msg = history[-1][0]
113
+ bot_msg = chat_func(user_msg, history[:-1], model_name)
114
+ history[-1][1] = bot_msg
115
+ return history
116
 
117
+ msg_box.submit(user_send, [msg_box, chatbot], [chatbot, msg_box]).then(
118
+ bot_reply, [chatbot, model_selector], chatbot
 
 
119
  )
120
 
121
 
122
+ demo.launch()