Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import AutoModelForCausalLM, AutoTokenizer,pipeline | |
| model_id = "meta-llama/Llama-2-7b-chat-hf" | |
| model_id = str(st.text_input("Enter model_id")) | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| load_in_4bit=True, | |
| #attn_implementation="flash_attention_2", # if you have an ampere GPU | |
| ) | |
| max_new_tokens=100 | |
| top_k=50 | |
| temperature=0.1 | |
| max_new_tokens = st.text_input("Enter max_new_tokens") | |
| top_k = st.text_input("Enter max_new_tokens") | |
| temperature = st.text_input("Enter temperature") | |
| query = st.chat_input("Enter your query") | |
| st.write(query) | |
| pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=100, top_k=50, temperature=0.1) | |
| llm = HuggingFacePipeline(pipeline=pipe) | |
| st.write(llm.invoke(query)) | |