import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import streamlit as st # Set the device to CUDA if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_source = 10 if model_source == 1: #pipe = pipeline("text-generation", model="trained_models/") pipe = pipeline("text-generation", model="trained_models/", device=device.index if device.type == 'cuda' else -1) else: pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device=device.index if device.type == 'cuda' else -1) input_text = st.text_input(label='prompt:') #st.text_input(label='prompt:') context = st.text_input(label='provide context for the model.. who/what should it be?') #st.text_input(label='how do you want me to answer the question? ie. respond as if you are explaining to a child') messages = [ { "role": "system", "content": f"{context}", }, {"role": "user", "content": f"{input_text}"}, ] # Prepare the prompt prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) if st.button("generate response"): # Generate a response outputs = pipe(prompt, max_new_tokens=250, do_sample=True, temperature=0.5, top_k=10, top_p=0.90) st.write(outputs[0]["generated_text"].split('<|assistant|>')[1])