Robert Castagna commited on
Commit
6df050a
·
1 Parent(s): d485bba

add mistral code

Browse files
Files changed (1) hide show
  1. app.py +37 -1
app.py CHANGED
@@ -1,4 +1,40 @@
1
  import streamlit as st
2
 
3
  x = st.slider('Select a value')
4
- st.write(x, 'squared is', x * x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
 
3
  x = st.slider('Select a value')
4
+ st.write(x, 'squared is', x * x)
5
+
6
+ import torch
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer
8
+
9
+ # Set the device to CUDA if available
10
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
+
12
+ print("Running on GPU: ", torch.cuda.is_available())
13
+
14
+ model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", torch_dtype="auto", trust_remote_code=True)
15
+ tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", trust_remote_code=True)
16
+
17
+ # Set the padding token if not already defined
18
+ if tokenizer.pad_token is None:
19
+ tokenizer.pad_token = tokenizer.eos_token
20
+
21
+ # Move model to the selected device
22
+ model = model.to(device)
23
+
24
+ #input_text = "What are the side effects of sunscreen?"
25
+
26
+ input_text = st.text_input()
27
+
28
+ if st.button("generate response"):
29
+ # Encode input text along with attention mask
30
+ encoding = tokenizer(input_text, return_tensors='pt', max_length=150, padding='max_length')
31
+ attention_mask = encoding['attention_mask'].to(device)
32
+
33
+ # Move input tensors to the same device as the model
34
+ inputs = encoding['input_ids'].to(device)
35
+
36
+ # Generate output using both input_ids and attention_mask
37
+ outputs = model.generate(inputs, attention_mask=attention_mask, max_new_tokens= 50, num_return_sequences=1)
38
+
39
+ for i, output_id in enumerate(outputs):
40
+ st.write(f"Generated text {i+1}: {tokenizer.decode(output_id, skip_special_tokens=True)}")