Spaces:

RobertCastagna
/

FIN_LLM

Sleeping

App Files Files Community

Robert Castagna commited on Jan 5, 2024

Commit

1bbf147

1 Parent(s): ae7cb95

new model

Browse files

Files changed (1) hide show

app.py +14 -24

app.py CHANGED Viewed

@@ -3,38 +3,28 @@ import streamlit as st
 x = st.slider('Select a value')
 st.write(x, 'squared is', x * x)
-#import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
 # Set the device to CUDA if available
 #device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-#print("Running on GPU: ", torch.cuda.is_available())
-model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", torch_dtype="auto", trust_remote_code=True)
-tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", trust_remote_code=True)
-# Set the padding token if not already defined
-if tokenizer.pad_token is None:
-    tokenizer.pad_token = tokenizer.eos_token
-# Move model to the selected device
-#model = model.to(device)
-#input_text = "What are the side effects of sunscreen?"
-input_text = st.text_input()
 if st.button("generate response"):
-    # Encode input text along with attention mask
-    encoding = tokenizer(input_text, return_tensors='pt', max_length=150, padding='max_length')
-    attention_mask = encoding['attention_mask'] #.to(device)
-    # Move input tensors to the same device as the model
-    inputs = encoding['input_ids'] #.to(device)
-    # Generate output using both input_ids and attention_mask
-    outputs = model.generate(inputs, attention_mask=attention_mask, max_new_tokens= 50, num_return_sequences=1)
-    for i, output_id in enumerate(outputs):
-        st.write(f"Generated text {i+1}: {tokenizer.decode(output_id, skip_special_tokens=True)}")

 x = st.slider('Select a value')
 st.write(x, 'squared is', x * x)
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 # Set the device to CUDA if available
 #device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")
+input_text = st.text_input(label='press generate')
+messages = [
+    {
+        "role": "system",
+        "content": "You are a friendly chatbot who always responds in the style of a pirate",
+    },
+    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+]
+prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 if st.button("generate response"):
+    outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
+    print(outputs[0]["generated_text"])