while-nalu commited on
Commit
98e5894
·
verified ·
1 Parent(s): 9592504

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -19
app.py CHANGED
@@ -1,31 +1,59 @@
 
 
 
 
1
  from transformers import AutoModelForCausalLM, AutoTokenizer
2
- device = "cuda" # the device to load the model onto
3
 
 
 
 
 
4
  model = AutoModelForCausalLM.from_pretrained(
5
  "Qwen/Qwen1.5-0.5B-Chat",
6
  torch_dtype="auto",
7
  device_map="auto"
8
- )
9
  tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B-Chat")
10
 
11
- prompt = "Give me a short introduction to large language model."
 
 
 
 
12
  messages = [
13
  {"role": "system", "content": "You are a helpful assistant."},
14
- {"role": "user", "content": prompt}
15
- ]
16
- text = tokenizer.apply_chat_template(
17
- messages,
18
- tokenize=False,
19
- add_generation_prompt=True
20
- )
21
- model_inputs = tokenizer([text], return_tensors="pt").to(device)
22
-
23
- generated_ids = model.generate(
24
- model_inputs.input_ids,
25
- max_new_tokens=512
26
- )
27
- generated_ids = [
28
- output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
29
  ]
30
 
31
- response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.system("pip install transformers")
3
+
4
+ import streamlit as st
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
6
 
7
+ # Set device
8
+ device = "cuda" if st.sidebar.checkbox("Use GPU", True) else "cpu"
9
+
10
+ # Load model and tokenizer
11
  model = AutoModelForCausalLM.from_pretrained(
12
  "Qwen/Qwen1.5-0.5B-Chat",
13
  torch_dtype="auto",
14
  device_map="auto"
15
+ ).to(device)
16
  tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B-Chat")
17
 
18
+ # Create a chatbot interface
19
+ st.title("Chatbot")
20
+ st.write("Ask me anything!")
21
+
22
+ # Initialize messages
23
  messages = [
24
  {"role": "system", "content": "You are a helpful assistant."},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  ]
26
 
27
+ # Display chat history
28
+ for message in messages:
29
+ if message["role"] == "system":
30
+ st.write(f"*System*: {message['content']}")
31
+ elif message["role"] == "user":
32
+ st.write(f"*You*: {message['content']}")
33
+ elif message["role"] == "assistant":
34
+ st.write(f"*Assistant*: {message['content']}")
35
+
36
+ # Get user input
37
+ user_input = st.text_input("Your message")
38
+
39
+ # Generate response
40
+ if user_input:
41
+ messages.append({"role": "user", "content": user_input})
42
+ text = tokenizer.apply_chat_template(
43
+ messages,
44
+ tokenize=False,
45
+ add_generation_prompt=True
46
+ )
47
+ model_inputs = tokenizer([text], return_tensors="pt").to(device)
48
+ generated_ids = model.generate(
49
+ model_inputs.input_ids,
50
+ max_new_tokens=512
51
+ )
52
+ generated_ids = [
53
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
54
+ ]
55
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
56
+ messages.append({"role": "assistant", "content": response})
57
+
58
+ # Display response
59
+ st.write(f"*Assistant*: {response}")