Robert Castagna commited on
Commit
1bbf147
·
1 Parent(s): ae7cb95
Files changed (1) hide show
  1. app.py +14 -24
app.py CHANGED
@@ -3,38 +3,28 @@ import streamlit as st
3
  x = st.slider('Select a value')
4
  st.write(x, 'squared is', x * x)
5
 
6
- #import torch
7
- from transformers import AutoModelForCausalLM, AutoTokenizer
8
 
9
  # Set the device to CUDA if available
10
  #device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
 
12
- #print("Running on GPU: ", torch.cuda.is_available())
13
 
14
- model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", torch_dtype="auto", trust_remote_code=True)
15
- tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", trust_remote_code=True)
16
 
17
- # Set the padding token if not already defined
18
- if tokenizer.pad_token is None:
19
- tokenizer.pad_token = tokenizer.eos_token
 
 
 
 
20
 
21
- # Move model to the selected device
22
- #model = model.to(device)
23
 
24
- #input_text = "What are the side effects of sunscreen?"
25
-
26
- input_text = st.text_input()
27
 
28
  if st.button("generate response"):
29
- # Encode input text along with attention mask
30
- encoding = tokenizer(input_text, return_tensors='pt', max_length=150, padding='max_length')
31
- attention_mask = encoding['attention_mask'] #.to(device)
32
-
33
- # Move input tensors to the same device as the model
34
- inputs = encoding['input_ids'] #.to(device)
35
-
36
- # Generate output using both input_ids and attention_mask
37
- outputs = model.generate(inputs, attention_mask=attention_mask, max_new_tokens= 50, num_return_sequences=1)
38
 
39
- for i, output_id in enumerate(outputs):
40
- st.write(f"Generated text {i+1}: {tokenizer.decode(output_id, skip_special_tokens=True)}")
 
3
  x = st.slider('Select a value')
4
  st.write(x, 'squared is', x * x)
5
 
6
+ import torch
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
8
 
9
  # Set the device to CUDA if available
10
  #device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
 
12
+ pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")
13
 
14
+ input_text = st.text_input(label='press generate')
 
15
 
16
+ messages = [
17
+ {
18
+ "role": "system",
19
+ "content": "You are a friendly chatbot who always responds in the style of a pirate",
20
+ },
21
+ {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
22
+ ]
23
 
24
+ prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
25
 
 
 
 
26
 
27
  if st.button("generate response"):
 
 
 
 
 
 
 
 
 
28
 
29
+ outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
30
+ print(outputs[0]["generated_text"])