Marcus719 commited on
Commit
39de6aa
·
verified ·
1 Parent(s): 1392118

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -52
app.py CHANGED
@@ -1,70 +1,144 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
 
 
 
 
 
4
 
5
- def respond(
6
- message,
7
- history: list[dict[str, str]],
8
- system_message,
9
- max_tokens,
10
- temperature,
11
- top_p,
12
- hf_token: gr.OAuthToken,
13
- ):
14
- """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
- """
17
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
 
 
 
 
19
  messages = [{"role": "system", "content": system_message}]
20
-
21
- messages.extend(history)
22
-
 
 
 
 
 
 
23
  messages.append({"role": "user", "content": message})
24
-
 
25
  response = ""
26
-
27
- for message in client.chat_completion(
28
  messages,
29
  max_tokens=max_tokens,
30
  stream=True,
31
  temperature=temperature,
32
  top_p=top_p,
33
  ):
34
- choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- chatbot = gr.ChatInterface(
47
- respond,
48
- type="messages",
49
- additional_inputs=[
50
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
- gr.Slider(
54
- minimum=0.1,
55
- maximum=1.0,
56
- value=0.95,
57
- step=0.05,
58
- label="Top-p (nucleus sampling)",
59
- ),
60
- ],
61
- )
62
 
63
- with gr.Blocks() as demo:
64
- with gr.Sidebar():
65
- gr.LoginButton()
66
- chatbot.render()
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  if __name__ == "__main__":
70
- demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
+ # ============================================
5
+ # KTH ID2223 Lab 2 - Llama 3.2 ChatBot
6
+ # ============================================
7
+ # 使用你的微调模型(safetensors 格式)
8
+ MODEL_ID = "Marcus719/Llama-3.2-3B-Instruct-Lab2"
9
 
10
+ client = InferenceClient(model=MODEL_ID)
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ def chat(message, history, system_message, max_tokens, temperature, top_p):
13
+ """Generate response using HuggingFace Inference API"""
14
+
15
  messages = [{"role": "system", "content": system_message}]
16
+
17
+ # Add conversation history
18
+ for user_msg, assistant_msg in history:
19
+ if user_msg:
20
+ messages.append({"role": "user", "content": user_msg})
21
+ if assistant_msg:
22
+ messages.append({"role": "assistant", "content": assistant_msg})
23
+
24
+ # Add current message
25
  messages.append({"role": "user", "content": message})
26
+
27
+ # Stream response
28
  response = ""
29
+ for chunk in client.chat_completion(
 
30
  messages,
31
  max_tokens=max_tokens,
32
  stream=True,
33
  temperature=temperature,
34
  top_p=top_p,
35
  ):
36
+ if chunk.choices and chunk.choices[0].delta.content:
37
+ token = chunk.choices[0].delta.content
38
+ response += token
39
+ yield response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ # ============================================
42
+ # Gradio 界面
43
+ # ============================================
44
+ DEFAULT_SYSTEM_PROMPT = "You are a helpful, respectful and honest assistant."
45
 
46
+ with gr.Blocks(theme=gr.themes.Soft(), title="🦙 Llama 3.2 ChatBot") as demo:
47
+
48
+ gr.Markdown(
49
+ """
50
+ # 🦙 Llama 3.2 3B Instruct ChatBot
51
+
52
+ **KTH ID2223 Scalable Machine Learning - Lab 2**
53
+
54
+ This demo uses the Llama 3.2 3B Instruct model via HuggingFace Inference API.
55
+
56
+ 📦 Fine-tuned Model (GGUF): [Marcus719/Llama-3.2-3B-Instruct-FineTome-Lab2-GGUF](https://huggingface.co/Marcus719/Llama-3.2-3B-Instruct-FineTome-Lab2-GGUF)
57
+ """
58
+ )
59
+
60
+ chatbot = gr.Chatbot(label="Chat", height=450, show_copy_button=True)
61
+
62
+ with gr.Row():
63
+ msg = gr.Textbox(
64
+ placeholder="Type your message here...",
65
+ scale=4,
66
+ container=False,
67
+ autofocus=True
68
+ )
69
+ submit_btn = gr.Button("Send 🚀", scale=1, variant="primary")
70
+
71
+ with gr.Accordion("⚙️ Settings", open=False):
72
+ system_prompt = gr.Textbox(
73
+ label="System Prompt",
74
+ value=DEFAULT_SYSTEM_PROMPT,
75
+ lines=2
76
+ )
77
+ with gr.Row():
78
+ max_tokens = gr.Slider(64, 1024, value=512, step=32, label="Max Tokens")
79
+ temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature")
80
+ top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
81
+
82
+ with gr.Row():
83
+ clear_btn = gr.Button("🗑️ Clear Chat")
84
+ retry_btn = gr.Button("🔄 Regenerate")
85
+
86
+ gr.Examples(
87
+ examples=[
88
+ "Hello! Can you introduce yourself?",
89
+ "Explain machine learning in simple terms.",
90
+ "What is the difference between fine-tuning and pre-training?",
91
+ "Write a short poem about AI.",
92
+ ],
93
+ inputs=msg,
94
+ label="💡 Try these examples"
95
+ )
96
+
97
+ # Event handlers
98
+ def user_input(message, history):
99
+ return "", history + [[message, None]]
100
+
101
+ def bot_response(history, system_prompt, max_tokens, temperature, top_p):
102
+ if not history:
103
+ return history
104
+ message = history[-1][0]
105
+ history_for_model = history[:-1]
106
+ for response in chat(message, history_for_model, system_prompt, max_tokens, temperature, top_p):
107
+ history[-1][1] = response
108
+ yield history
109
+
110
+ def retry_last(history, system_prompt, max_tokens, temperature, top_p):
111
+ if history:
112
+ history[-1][1] = None
113
+ message = history[-1][0]
114
+ history_for_model = history[:-1]
115
+ for response in chat(message, history_for_model, system_prompt, max_tokens, temperature, top_p):
116
+ history[-1][1] = response
117
+ yield history
118
+
119
+ msg.submit(user_input, [msg, chatbot], [msg, chatbot], queue=False).then(
120
+ bot_response, [chatbot, system_prompt, max_tokens, temperature, top_p], chatbot
121
+ )
122
+ submit_btn.click(user_input, [msg, chatbot], [msg, chatbot], queue=False).then(
123
+ bot_response, [chatbot, system_prompt, max_tokens, temperature, top_p], chatbot
124
+ )
125
+ clear_btn.click(lambda: [], None, chatbot, queue=False)
126
+ retry_btn.click(retry_last, [chatbot, system_prompt, max_tokens, temperature, top_p], chatbot)
127
+
128
+ gr.Markdown(
129
+ """
130
+ ---
131
+ ### 📝 About This Project
132
+
133
+ **Fine-tuning Details:**
134
+ - Base Model: `meta-llama/Llama-3.2-3B-Instruct`
135
+ - Dataset: [FineTome-100k](https://huggingface.co/datasets/mlabonne/FineTome-100k)
136
+ - Method: QLoRA (4-bit quantization + LoRA)
137
+ - Framework: [Unsloth](https://github.com/unslothai/unsloth)
138
+
139
+ Built with ❤️ for KTH ID2223 Lab 2
140
+ """
141
+ )
142
 
143
  if __name__ == "__main__":
144
+ demo.launch()