Marcus719 commited on
Commit
2a8403d
·
verified ·
1 Parent(s): d94e47a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -134
app.py CHANGED
@@ -1,141 +1,30 @@
1
  import gradio as gr
2
- from huggingface_hub import hf_hub_download
3
- from llama_cpp import Llama
4
 
5
- # ============================================
6
- # 配置区域 - KTH ID2223 Lab 2
7
- # ============================================
8
- MODEL_REPO = "Marcus719/Llama-3.2-3B-Instruct-FineTome-Lab2-GGUF"
9
- MODEL_FILENAME = "unsloth.Q4_K_M.gguf"
10
 
11
- # ============================================
12
- # 下载并加载模型
13
- # ============================================
14
- print(f"📥 Downloading model from {MODEL_REPO}...")
15
-
16
- model_path = hf_hub_download(
17
- repo_id=MODEL_REPO,
18
- filename=MODEL_FILENAME,
19
  )
20
- print(f"✅ Model downloaded: {model_path}")
21
-
22
- print("🔄 Loading model (this may take a minute on CPU)...")
23
 
24
- llm = Llama(
25
- model_path=model_path,
26
- n_ctx=2048,
27
- n_threads=2,
28
- n_gpu_layers=0,
29
- verbose=False
 
 
 
 
 
 
 
30
  )
31
 
32
- print("✅ Model loaded successfully!")
33
-
34
- # ============================================
35
- # Llama 3.2 Instruct 对话模板
36
- # ============================================
37
- def format_prompt(message: str, history: list, system_prompt: str) -> str:
38
- prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|>"
39
-
40
- for user_msg, assistant_msg in history:
41
- if user_msg:
42
- prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_msg}<|eot_id|>"
43
- if assistant_msg:
44
- prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n{assistant_msg}<|eot_id|>"
45
-
46
- prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|>"
47
- prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n"
48
-
49
- return prompt
50
-
51
- # ============================================
52
- # 生成回复函数
53
- # ============================================
54
- def chat(message: str, history: list, system_prompt: str, max_tokens: int, temperature: float, top_p: float):
55
- prompt = format_prompt(message, history, system_prompt)
56
-
57
- response = ""
58
- stream = llm(
59
- prompt,
60
- max_tokens=max_tokens,
61
- temperature=temperature,
62
- top_p=top_p,
63
- stop=["<|eot_id|>", "<|end_of_text|>"],
64
- stream=True
65
- )
66
-
67
- for chunk in stream:
68
- token = chunk["choices"][0]["text"]
69
- response += token
70
- yield response
71
-
72
- # ============================================
73
- # Gradio 界面
74
- # ============================================
75
- DEFAULT_SYSTEM_PROMPT = "You are a helpful, respectful and honest assistant."
76
-
77
- with gr.Blocks(theme=gr.themes.Soft(), title="🦙 Llama 3.2 ChatBot") as demo:
78
-
79
- gr.Markdown(
80
- """
81
- # 🦙 Llama 3.2 3B - Fine-tuned on FineTome
82
- **KTH ID2223 Lab 2** | [Model](https://huggingface.co/Marcus719/Llama-3.2-3B-Instruct-FineTome-Lab2-GGUF)
83
- """
84
- )
85
-
86
- chatbot = gr.Chatbot(label="Chat", height=400, show_copy_button=True)
87
-
88
- with gr.Row():
89
- msg = gr.Textbox(placeholder="Type your message...", scale=4, container=False)
90
- submit_btn = gr.Button("Send 🚀", scale=1, variant="primary")
91
-
92
- with gr.Accordion("⚙️ Settings", open=False):
93
- system_prompt = gr.Textbox(label="System Prompt", value=DEFAULT_SYSTEM_PROMPT, lines=2)
94
- with gr.Row():
95
- max_tokens = gr.Slider(64, 512, value=256, step=32, label="Max Tokens")
96
- temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature")
97
- top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
98
-
99
- with gr.Row():
100
- clear_btn = gr.Button("🗑️ Clear")
101
- retry_btn = gr.Button("🔄 Retry")
102
-
103
- gr.Examples(
104
- examples=["Hello!", "Explain machine learning.", "What is fine-tuning?"],
105
- inputs=msg
106
- )
107
-
108
- def user_input(message, history):
109
- return "", history + [[message, None]]
110
-
111
- def bot_response(history, system_prompt, max_tokens, temperature, top_p):
112
- if not history:
113
- return history
114
- message = history[-1][0]
115
- history_for_model = history[:-1]
116
- for response in chat(message, history_for_model, system_prompt, max_tokens, temperature, top_p):
117
- history[-1][1] = response
118
- yield history
119
-
120
- def retry_last(history, system_prompt, max_tokens, temperature, top_p):
121
- if history:
122
- history[-1][1] = None
123
- message = history[-1][0]
124
- history_for_model = history[:-1]
125
- for response in chat(message, history_for_model, system_prompt, max_tokens, temperature, top_p):
126
- history[-1][1] = response
127
- yield history
128
-
129
- msg.submit(user_input, [msg, chatbot], [msg, chatbot], queue=False).then(
130
- bot_response, [chatbot, system_prompt, max_tokens, temperature, top_p], chatbot
131
- )
132
- submit_btn.click(user_input, [msg, chatbot], [msg, chatbot], queue=False).then(
133
- bot_response, [chatbot, system_prompt, max_tokens, temperature, top_p], chatbot
134
- )
135
- clear_btn.click(lambda: [], None, chatbot, queue=False)
136
- retry_btn.click(retry_last, [chatbot, system_prompt, max_tokens, temperature, top_p], chatbot)
137
-
138
- gr.Markdown("---\nBuilt with ❤️ | KTH ID2223 Lab 2")
139
-
140
- if __name__ == "__main__":
141
- demo.queue().launch()
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
 
3
 
4
+ model_name = "Marcus719/Llama-3.2-3B-Instruct-Lab2"
 
 
 
 
5
 
6
+ # load tokenizer and model
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForCausalLM.from_pretrained(
9
+ model_name,
10
+ low_cpu_mem_usage=True,
11
+ device_map="auto"
 
 
12
  )
 
 
 
13
 
14
+ # define generate function
15
+ def generate_text(input_text):
16
+ inputs = tokenizer(input_text, return_tensors="pt")
17
+ outputs = model.generate(inputs["input_ids"], max_length=100, num_return_sequences=1)
18
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
19
+
20
+ # create gradio interface
21
+ interface = gr.Interface(
22
+ fn=generate_text,
23
+ inputs="text",
24
+ outputs="text",
25
+ title="Hugging Face model Demo",
26
+ description="say something"
27
  )
28
 
29
+ # launch the app
30
+ interface.launch()