Spaces:
Sleeping
Sleeping
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| import streamlit as st | |
| # Set the device to CUDA if available | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| model_source = 10 | |
| if model_source == 1: | |
| #pipe = pipeline("text-generation", model="trained_models/") | |
| pipe = pipeline("text-generation", model="trained_models/", device=device.index if device.type == 'cuda' else -1) | |
| else: | |
| pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device=device.index if device.type == 'cuda' else -1) | |
| input_text = st.text_input(label='prompt:') #st.text_input(label='prompt:') | |
| context = st.text_input(label='provide context for the model.. who/what should it be?') #st.text_input(label='how do you want me to answer the question? ie. respond as if you are explaining to a child') | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": f"{context}", | |
| }, | |
| {"role": "user", "content": f"{input_text}"}, | |
| ] | |
| # Prepare the prompt | |
| prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| if st.button("generate response"): | |
| # Generate a response | |
| outputs = pipe(prompt, max_new_tokens=250, do_sample=True, temperature=0.5, top_k=10, top_p=0.90) | |
| st.write(outputs[0]["generated_text"].split('<|assistant|>')[1]) | |