Spaces:

RobertCastagna
/

FIN_LLM

Sleeping

Robert Castagna commited on Jan 4, 2024

Commit

a28382d

1 Parent(s): 6df050a

add mistral code

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,13 +3,13 @@ import streamlit as st
 x = st.slider('Select a value')
 st.write(x, 'squared is', x * x)
-import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 # Set the device to CUDA if available
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-print("Running on GPU: ", torch.cuda.is_available())
 model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", torch_dtype="auto", trust_remote_code=True)
 tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", trust_remote_code=True)
@@ -19,7 +19,7 @@ if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
 # Move model to the selected device
-model = model.to(device)
 #input_text = "What are the side effects of sunscreen?"
@@ -28,10 +28,10 @@ input_text = st.text_input()
 if st.button("generate response"):
     # Encode input text along with attention mask
     encoding = tokenizer(input_text, return_tensors='pt', max_length=150, padding='max_length')
-    attention_mask = encoding['attention_mask'].to(device)
     # Move input tensors to the same device as the model
-    inputs = encoding['input_ids'].to(device)
     # Generate output using both input_ids and attention_mask
     outputs = model.generate(inputs, attention_mask=attention_mask, max_new_tokens= 50, num_return_sequences=1)

 x = st.slider('Select a value')
 st.write(x, 'squared is', x * x)
+#import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 # Set the device to CUDA if available
+#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+#print("Running on GPU: ", torch.cuda.is_available())
 model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", torch_dtype="auto", trust_remote_code=True)
 tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", trust_remote_code=True)
     tokenizer.pad_token = tokenizer.eos_token
 # Move model to the selected device
+#model = model.to(device)
 #input_text = "What are the side effects of sunscreen?"
 if st.button("generate response"):
     # Encode input text along with attention mask
     encoding = tokenizer(input_text, return_tensors='pt', max_length=150, padding='max_length')
+    attention_mask = encoding['attention_mask'] #.to(device)
     # Move input tensors to the same device as the model
+    inputs = encoding['input_ids'] #.to(device)
     # Generate output using both input_ids and attention_mask
     outputs = model.generate(inputs, attention_mask=attention_mask, max_new_tokens= 50, num_return_sequences=1)